import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import plot_tree
from sklearn.model_selection import learning_curve
Dataset links are given below:
Merged Dataset CSV will be uploaded in DropBox submission
df = pd.read_csv("AAI Updated Dataset.csv")
df
| Country Name | Country Code | Region | IncomeGroup | 1990(E) | 1991(E) | 1992(E) | 1993(E) | 1994(E) | 1995(E) | ... | 2013(Population annual growth) | 2014(Population annual growth) | 2015(Population annual growth) | 2016(Population annual growth) | 2017(Population annual growth) | 2018(Population annual growth) | 2019(Population annual growth) | 2020(Population annual growth) | 2021(Population annual growth) | 2022(Population annual growth) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | Latin America & Caribbean | High income | 100.0 | 99.153656 | 99.197128 | 99.239914 | 100.000000 | 100.000000 | ... | 0.749301 | 0.691615 | 0.637959 | 0.590062 | 0.537296 | 0.494795 | 0.451970 | 0.134255 | -0.045045 | -0.086392 |
| 1 | Afghanistan | AFG | South Asia | Low income | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.466788 | 3.657576 | 3.121341 | 2.581549 | 2.866492 | 2.885208 | 2.908529 | 3.134747 | 2.851358 | 2.534498 |
| 2 | Angola | AGO | Sub-Saharan Africa | Lower middle income | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.735525 | 3.684429 | 3.617678 | 3.586211 | 3.550987 | 3.464457 | 3.395278 | 3.268348 | 3.166030 | 3.096753 |
| 3 | Albania | ALB | Europe & Central Asia | Upper middle income | 100.0 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | -0.183211 | -0.207047 | -0.291206 | -0.159880 | -0.091972 | -0.246732 | -0.426007 | -0.574207 | -0.926918 | -1.215790 |
| 4 | Andorra | AND | Europe & Central Asia | High income | 100.0 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | 0.497262 | 0.355275 | 0.174378 | 1.100603 | 1.772183 | 1.580147 | 1.757491 | 1.761891 | 1.702288 | 0.994607 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 212 | Kosovo | XKX | Europe & Central Asia | Upper middle income | 100.0 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | 0.607468 | -0.294474 | -1.364932 | -0.596734 | 0.753585 | 0.339011 | -0.457730 | 0.070131 | -0.229016 | -1.355874 |
| 213 | Yemen, Rep. | YEM | Middle East & North Africa | Low income | 0.0 | 0.000000 | 44.100000 | 40.774715 | 41.985134 | 43.193512 | ... | 2.859237 | 2.811072 | 2.712955 | 2.621537 | 2.564321 | 2.486360 | 2.426208 | 2.310447 | 2.137790 | 2.144628 |
| 214 | South Africa | ZAF | Sub-Saharan Africa | Upper middle income | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 1.361621 | 1.576294 | 2.074017 | 0.972004 | 0.387278 | 1.225530 | 1.295074 | 1.223179 | 0.998920 | 0.841058 |
| 215 | Zambia | ZMB | Sub-Saharan Africa | Lower middle income | 13.9 | 12.752703 | 19.200000 | 14.213207 | 14.941759 | 15.668272 | ... | 3.271299 | 3.247118 | 3.191896 | 3.147407 | 3.113595 | 3.061888 | 3.007618 | 2.933818 | 2.840806 | 2.758032 |
| 216 | Zimbabwe | ZWE | Sub-Saharan Africa | Lower middle income | 0.0 | 0.000000 | 28.200000 | 31.163668 | 28.100000 | 31.948580 | ... | 2.163267 | 2.191391 | 2.136294 | 2.081806 | 2.043620 | 2.020537 | 1.989253 | 2.031112 | 2.045715 | 2.024036 |
217 rows × 309 columns
print(list(df.columns))
['Country Name', 'Country Code', 'Region', 'IncomeGroup', '1990(E)', '1991(E)', '1992(E)', '1993(E)', '1994(E)', '1995(E)', '1996(E)', '1997(E)', '1998(E)', '1999(E)', '2000(E)', '2001(E)', '2002(E)', '2003(E)', '2004(E)', '2005(E)', '2006(E)', '2007(E)', '2008(E)', '2009(E)', '2010(E)', '2011(E)', '2012(E)', '2013(E)', '2014(E)', '2015(E)', '2016(E)', '2017(E)', '2018(E)', '2019(E)', '2020(E)', '2021(E)', '1960(EG)', '1961(EG)', '1962(EG)', '1963(EG)', '1964(EG)', '1965(EG)', '1966(EG)', '1967(EG)', '1968(EG)', '1969(EG)', '1970(EG)', '1971(EG)', '1972(EG)', '1973(EG)', '1974(EG)', '1975(EG)', '1976(EG)', '1977(EG)', '1978(EG)', '1979(EG)', '1980(EG)', '1981(EG)', '1982(EG)', '1983(EG)', '1984(EG)', '1985(EG)', '1986(EG)', '1987(EG)', '1988(EG)', '1989(EG)', '1990(EG)', '1991(EG)', '1992(EG)', '1993(EG)', '1994(EG)', '1995(EG)', '1996(EG)', '1997(EG)', '1998(EG)', '1999(EG)', '2000(EG)', '2001(EG)', '2002(EG)', '2003(EG)', '2004(EG)', '2005(EG)', '2006(EG)', '2007(EG)', '2008(EG)', '2009(EG)', '2010(EG)', '2011(EG)', '2012(EG)', '2013(EG)', '2014(EG)', '2015(EG)', '1990(CO2)', '1991(CO2)', '1992(CO2)', '1993(CO2)', '1994(CO2)', '1995(CO2)', '1996(CO2)', '1997(CO2)', '1998(CO2)', '1999(CO2)', '2000(CO2)', '2001(CO2)', '2002(CO2)', '2003(CO2)', '2004(CO2)', '2005(CO2)', '2006(CO2)', '2007(CO2)', '2008(CO2)', '2009(CO2)', '2010(CO2)', '2011(CO2)', '2012(CO2)', '2013(CO2)', '2014(CO2)', '2015(CO2)', '2016(CO2)', '2017(CO2)', '2018(CO2)', '2019(CO2)', '2020(CO2)', '1961(GDP annual growth)', '1962(GDP annual growth)', '1963(GDP annual growth)', '1964(GDP annual growth)', '1965(GDP annual growth)', '1966(GDP annual growth)', '1967(GDP annual growth)', '1968(GDP annual growth)', '1969(GDP annual growth)', '1970(GDP annual growth)', '1971(GDP annual growth)', '1972(GDP annual growth)', '1973(GDP annual growth)', '1974(GDP annual growth)', '1975(GDP annual growth)', '1976(GDP annual growth)', '1977(GDP annual growth)', '1978(GDP annual growth)', '1979(GDP annual growth)', '1980(GDP annual growth)', '1981(GDP annual growth)', '1982(GDP annual growth)', '1983(GDP annual growth)', '1984(GDP annual growth)', '1985(GDP annual growth)', '1986(GDP annual growth)', '1987(GDP annual growth)', '1988(GDP annual growth)', '1989(GDP annual growth)', '1990(GDP annual growth)', '1991(GDP annual growth)', '1992(GDP annual growth)', '1993(GDP annual growth)', '1994(GDP annual growth)', '1995(GDP annual growth)', '1996(GDP annual growth)', '1997(GDP annual growth)', '1998(GDP annual growth)', '1999(GDP annual growth)', '2000(GDP annual growth)', '2001(GDP annual growth)', '2002(GDP annual growth)', '2003(GDP annual growth)', '2004(GDP annual growth)', '2005(GDP annual growth)', '2006(GDP annual growth)', '2007(GDP annual growth)', '2008(GDP annual growth)', '2009(GDP annual growth)', '2010(GDP annual growth)', '2011(GDP annual growth)', '2012(GDP annual growth)', '2013(GDP annual growth)', '2014(GDP annual growth)', '2015(GDP annual growth)', '2016(GDP annual growth)', '2017(GDP annual growth)', '2018(GDP annual growth)', '2019(GDP annual growth)', '2020(GDP annual growth)', '2021(GDP annual growth)', '2022(GDP annual growth)', '1961(GDP per capita annual growth)', '1962(GDP per capita annual growth)', '1963(GDP per capita annual growth)', '1964(GDP per capita annual growth)', '1965(GDP per capita annual growth)', '1966(GDP per capita annual growth)', '1967(GDP per capita annual growth)', '1968(GDP per capita annual growth)', '1969(GDP per capita annual growth)', '1970(GDP per capita annual growth)', '1971(GDP per capita annual growth)', '1972(GDP per capita annual growth)', '1973(GDP per capita annual growth)', '1974(GDP per capita annual growth)', '1975(GDP per capita annual growth)', '1976(GDP per capita annual growth)', '1977(GDP per capita annual growth)', '1978(GDP per capita annual growth)', '1979(GDP per capita annual growth)', '1980(GDP per capita annual growth)', '1981(GDP per capita annual growth)', '1982(GDP per capita annual growth)', '1983(GDP per capita annual growth)', '1984(GDP per capita annual growth)', '1985(GDP per capita annual growth)', '1986(GDP per capita annual growth)', '1987(GDP per capita annual growth)', '1988(GDP per capita annual growth)', '1989(GDP per capita annual growth)', '1990(GDP per capita annual growth)', '1991(GDP per capita annual growth)', '1992(GDP per capita annual growth)', '1993(GDP per capita annual growth)', '1994(GDP per capita annual growth)', '1995(GDP per capita annual growth)', '1996(GDP per capita annual growth)', '1997(GDP per capita annual growth)', '1998(GDP per capita annual growth)', '1999(GDP per capita annual growth)', '2000(GDP per capita annual growth)', '2001(GDP per capita annual growth)', '2002(GDP per capita annual growth)', '2003(GDP per capita annual growth)', '2004(GDP per capita annual growth)', '2005(GDP per capita annual growth)', '2006(GDP per capita annual growth)', '2007(GDP per capita annual growth)', '2008(GDP per capita annual growth)', '2009(GDP per capita annual growth)', '2010(GDP per capita annual growth)', '2011(GDP per capita annual growth)', '2012(GDP per capita annual growth)', '2013(GDP per capita annual growth)', '2014(GDP per capita annual growth)', '2015(GDP per capita annual growth)', '2016(GDP per capita annual growth)', '2017(GDP per capita annual growth)', '2018(GDP per capita annual growth)', '2019(GDP per capita annual growth)', '2020(GDP per capita annual growth)', '2021(GDP per capita annual growth)', '2022(GDP per capita annual growth)', '1961(Population annual growth)', '1962(Population annual growth)', '1963(Population annual growth)', '1964(Population annual growth)', '1965(Population annual growth)', '1966(Population annual growth)', '1967(Population annual growth)', '1968(Population annual growth)', '1969(Population annual growth)', '1970(Population annual growth)', '1971(Population annual growth)', '1972(Population annual growth)', '1973(Population annual growth)', '1974(Population annual growth)', '1975(Population annual growth)', '1976(Population annual growth)', '1977(Population annual growth)', '1978(Population annual growth)', '1979(Population annual growth)', '1980(Population annual growth)', '1981(Population annual growth)', '1982(Population annual growth)', '1983(Population annual growth)', '1984(Population annual growth)', '1985(Population annual growth)', '1986(Population annual growth)', '1987(Population annual growth)', '1988(Population annual growth)', '1989(Population annual growth)', '1990(Population annual growth)', '1991(Population annual growth)', '1992(Population annual growth)', '1993(Population annual growth)', '1994(Population annual growth)', '1995(Population annual growth)', '1996(Population annual growth)', '1997(Population annual growth)', '1998(Population annual growth)', '1999(Population annual growth)', '2000(Population annual growth)', '2001(Population annual growth)', '2002(Population annual growth)', '2003(Population annual growth)', '2004(Population annual growth)', '2005(Population annual growth)', '2006(Population annual growth)', '2007(Population annual growth)', '2008(Population annual growth)', '2009(Population annual growth)', '2010(Population annual growth)', '2011(Population annual growth)', '2012(Population annual growth)', '2013(Population annual growth)', '2014(Population annual growth)', '2015(Population annual growth)', '2016(Population annual growth)', '2017(Population annual growth)', '2018(Population annual growth)', '2019(Population annual growth)', '2020(Population annual growth)', '2021(Population annual growth)', '2022(Population annual growth)']
df.head()
| Country Name | Country Code | Region | IncomeGroup | 1990(E) | 1991(E) | 1992(E) | 1993(E) | 1994(E) | 1995(E) | ... | 2013(Population annual growth) | 2014(Population annual growth) | 2015(Population annual growth) | 2016(Population annual growth) | 2017(Population annual growth) | 2018(Population annual growth) | 2019(Population annual growth) | 2020(Population annual growth) | 2021(Population annual growth) | 2022(Population annual growth) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | Latin America & Caribbean | High income | 100.0 | 99.153656 | 99.197128 | 99.239914 | 100.0 | 100.0 | ... | 0.749301 | 0.691615 | 0.637959 | 0.590062 | 0.537296 | 0.494795 | 0.451970 | 0.134255 | -0.045045 | -0.086392 |
| 1 | Afghanistan | AFG | South Asia | Low income | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | ... | 3.466788 | 3.657576 | 3.121341 | 2.581549 | 2.866492 | 2.885208 | 2.908529 | 3.134747 | 2.851358 | 2.534498 |
| 2 | Angola | AGO | Sub-Saharan Africa | Lower middle income | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | ... | 3.735525 | 3.684429 | 3.617678 | 3.586211 | 3.550987 | 3.464457 | 3.395278 | 3.268348 | 3.166030 | 3.096753 |
| 3 | Albania | ALB | Europe & Central Asia | Upper middle income | 100.0 | 100.000000 | 100.000000 | 100.000000 | 100.0 | 100.0 | ... | -0.183211 | -0.207047 | -0.291206 | -0.159880 | -0.091972 | -0.246732 | -0.426007 | -0.574207 | -0.926918 | -1.215790 |
| 4 | Andorra | AND | Europe & Central Asia | High income | 100.0 | 100.000000 | 100.000000 | 100.000000 | 100.0 | 100.0 | ... | 0.497262 | 0.355275 | 0.174378 | 1.100603 | 1.772183 | 1.580147 | 1.757491 | 1.761891 | 1.702288 | 0.994607 |
5 rows × 309 columns
print("Dataset dimensions:", df.shape)
Dataset dimensions: (217, 309)
print("\nData types:\n", df.dtypes)
Data types:
Country Name object
Country Code object
Region object
IncomeGroup object
1990(E) float64
...
2018(Population annual growth) float64
2019(Population annual growth) float64
2020(Population annual growth) float64
2021(Population annual growth) float64
2022(Population annual growth) float64
Length: 309, dtype: object
print("\nMissing values:\n", df.isnull().sum())
Missing values:
Country Name 0
Country Code 0
Region 0
IncomeGroup 0
1990(E) 0
..
2018(Population annual growth) 0
2019(Population annual growth) 0
2020(Population annual growth) 0
2021(Population annual growth) 0
2022(Population annual growth) 0
Length: 309, dtype: int64
df.describe().style.background_gradient(cmap="twilight_shifted")
| 1990(E) | 1991(E) | 1992(E) | 1993(E) | 1994(E) | 1995(E) | 1996(E) | 1997(E) | 1998(E) | 1999(E) | 2000(E) | 2001(E) | 2002(E) | 2003(E) | 2004(E) | 2005(E) | 2006(E) | 2007(E) | 2008(E) | 2009(E) | 2010(E) | 2011(E) | 2012(E) | 2013(E) | 2014(E) | 2015(E) | 2016(E) | 2017(E) | 2018(E) | 2019(E) | 2020(E) | 2021(E) | 1960(EG) | 1961(EG) | 1962(EG) | 1963(EG) | 1964(EG) | 1965(EG) | 1966(EG) | 1967(EG) | 1968(EG) | 1969(EG) | 1970(EG) | 1971(EG) | 1972(EG) | 1973(EG) | 1974(EG) | 1975(EG) | 1976(EG) | 1977(EG) | 1978(EG) | 1979(EG) | 1980(EG) | 1981(EG) | 1982(EG) | 1983(EG) | 1984(EG) | 1985(EG) | 1986(EG) | 1987(EG) | 1988(EG) | 1989(EG) | 1990(EG) | 1991(EG) | 1992(EG) | 1993(EG) | 1994(EG) | 1995(EG) | 1996(EG) | 1997(EG) | 1998(EG) | 1999(EG) | 2000(EG) | 2001(EG) | 2002(EG) | 2003(EG) | 2004(EG) | 2005(EG) | 2006(EG) | 2007(EG) | 2008(EG) | 2009(EG) | 2010(EG) | 2011(EG) | 2012(EG) | 2013(EG) | 2014(EG) | 2015(EG) | 1990(CO2) | 1991(CO2) | 1992(CO2) | 1993(CO2) | 1994(CO2) | 1995(CO2) | 1996(CO2) | 1997(CO2) | 1998(CO2) | 1999(CO2) | 2000(CO2) | 2001(CO2) | 2002(CO2) | 2003(CO2) | 2004(CO2) | 2005(CO2) | 2006(CO2) | 2007(CO2) | 2008(CO2) | 2009(CO2) | 2010(CO2) | 2011(CO2) | 2012(CO2) | 2013(CO2) | 2014(CO2) | 2015(CO2) | 2016(CO2) | 2017(CO2) | 2018(CO2) | 2019(CO2) | 2020(CO2) | 1961(GDP annual growth) | 1962(GDP annual growth) | 1963(GDP annual growth) | 1964(GDP annual growth) | 1965(GDP annual growth) | 1966(GDP annual growth) | 1967(GDP annual growth) | 1968(GDP annual growth) | 1969(GDP annual growth) | 1970(GDP annual growth) | 1971(GDP annual growth) | 1972(GDP annual growth) | 1973(GDP annual growth) | 1974(GDP annual growth) | 1975(GDP annual growth) | 1976(GDP annual growth) | 1977(GDP annual growth) | 1978(GDP annual growth) | 1979(GDP annual growth) | 1980(GDP annual growth) | 1981(GDP annual growth) | 1982(GDP annual growth) | 1983(GDP annual growth) | 1984(GDP annual growth) | 1985(GDP annual growth) | 1986(GDP annual growth) | 1987(GDP annual growth) | 1988(GDP annual growth) | 1989(GDP annual growth) | 1990(GDP annual growth) | 1991(GDP annual growth) | 1992(GDP annual growth) | 1993(GDP annual growth) | 1994(GDP annual growth) | 1995(GDP annual growth) | 1996(GDP annual growth) | 1997(GDP annual growth) | 1998(GDP annual growth) | 1999(GDP annual growth) | 2000(GDP annual growth) | 2001(GDP annual growth) | 2002(GDP annual growth) | 2003(GDP annual growth) | 2004(GDP annual growth) | 2005(GDP annual growth) | 2006(GDP annual growth) | 2007(GDP annual growth) | 2008(GDP annual growth) | 2009(GDP annual growth) | 2010(GDP annual growth) | 2011(GDP annual growth) | 2012(GDP annual growth) | 2013(GDP annual growth) | 2014(GDP annual growth) | 2015(GDP annual growth) | 2016(GDP annual growth) | 2017(GDP annual growth) | 2018(GDP annual growth) | 2019(GDP annual growth) | 2020(GDP annual growth) | 2021(GDP annual growth) | 2022(GDP annual growth) | 1961(GDP per capita annual growth) | 1962(GDP per capita annual growth) | 1963(GDP per capita annual growth) | 1964(GDP per capita annual growth) | 1965(GDP per capita annual growth) | 1966(GDP per capita annual growth) | 1967(GDP per capita annual growth) | 1968(GDP per capita annual growth) | 1969(GDP per capita annual growth) | 1970(GDP per capita annual growth) | 1971(GDP per capita annual growth) | 1972(GDP per capita annual growth) | 1973(GDP per capita annual growth) | 1974(GDP per capita annual growth) | 1975(GDP per capita annual growth) | 1976(GDP per capita annual growth) | 1977(GDP per capita annual growth) | 1978(GDP per capita annual growth) | 1979(GDP per capita annual growth) | 1980(GDP per capita annual growth) | 1981(GDP per capita annual growth) | 1982(GDP per capita annual growth) | 1983(GDP per capita annual growth) | 1984(GDP per capita annual growth) | 1985(GDP per capita annual growth) | 1986(GDP per capita annual growth) | 1987(GDP per capita annual growth) | 1988(GDP per capita annual growth) | 1989(GDP per capita annual growth) | 1990(GDP per capita annual growth) | 1991(GDP per capita annual growth) | 1992(GDP per capita annual growth) | 1993(GDP per capita annual growth) | 1994(GDP per capita annual growth) | 1995(GDP per capita annual growth) | 1996(GDP per capita annual growth) | 1997(GDP per capita annual growth) | 1998(GDP per capita annual growth) | 1999(GDP per capita annual growth) | 2000(GDP per capita annual growth) | 2001(GDP per capita annual growth) | 2002(GDP per capita annual growth) | 2003(GDP per capita annual growth) | 2004(GDP per capita annual growth) | 2005(GDP per capita annual growth) | 2006(GDP per capita annual growth) | 2007(GDP per capita annual growth) | 2008(GDP per capita annual growth) | 2009(GDP per capita annual growth) | 2010(GDP per capita annual growth) | 2011(GDP per capita annual growth) | 2012(GDP per capita annual growth) | 2013(GDP per capita annual growth) | 2014(GDP per capita annual growth) | 2015(GDP per capita annual growth) | 2016(GDP per capita annual growth) | 2017(GDP per capita annual growth) | 2018(GDP per capita annual growth) | 2019(GDP per capita annual growth) | 2020(GDP per capita annual growth) | 2021(GDP per capita annual growth) | 2022(GDP per capita annual growth) | 1961(Population annual growth) | 1962(Population annual growth) | 1963(Population annual growth) | 1964(Population annual growth) | 1965(Population annual growth) | 1966(Population annual growth) | 1967(Population annual growth) | 1968(Population annual growth) | 1969(Population annual growth) | 1970(Population annual growth) | 1971(Population annual growth) | 1972(Population annual growth) | 1973(Population annual growth) | 1974(Population annual growth) | 1975(Population annual growth) | 1976(Population annual growth) | 1977(Population annual growth) | 1978(Population annual growth) | 1979(Population annual growth) | 1980(Population annual growth) | 1981(Population annual growth) | 1982(Population annual growth) | 1983(Population annual growth) | 1984(Population annual growth) | 1985(Population annual growth) | 1986(Population annual growth) | 1987(Population annual growth) | 1988(Population annual growth) | 1989(Population annual growth) | 1990(Population annual growth) | 1991(Population annual growth) | 1992(Population annual growth) | 1993(Population annual growth) | 1994(Population annual growth) | 1995(Population annual growth) | 1996(Population annual growth) | 1997(Population annual growth) | 1998(Population annual growth) | 1999(Population annual growth) | 2000(Population annual growth) | 2001(Population annual growth) | 2002(Population annual growth) | 2003(Population annual growth) | 2004(Population annual growth) | 2005(Population annual growth) | 2006(Population annual growth) | 2007(Population annual growth) | 2008(Population annual growth) | 2009(Population annual growth) | 2010(Population annual growth) | 2011(Population annual growth) | 2012(Population annual growth) | 2013(Population annual growth) | 2014(Population annual growth) | 2015(Population annual growth) | 2016(Population annual growth) | 2017(Population annual growth) | 2018(Population annual growth) | 2019(Population annual growth) | 2020(Population annual growth) | 2021(Population annual growth) | 2022(Population annual growth) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 | 217.000000 |
| mean | 43.489329 | 46.004659 | 49.366703 | 51.154783 | 52.703799 | 54.736168 | 56.808000 | 58.373372 | 60.011029 | 62.150138 | 73.459199 | 73.967427 | 74.499641 | 75.075785 | 75.577143 | 76.019012 | 76.923533 | 77.292335 | 77.957729 | 78.430723 | 79.280472 | 79.985660 | 80.679052 | 81.235389 | 81.966951 | 82.521371 | 83.489038 | 84.218849 | 84.816159 | 85.363756 | 85.892042 | 86.373922 | 264.062811 | 268.187152 | 279.194845 | 294.338936 | 306.932011 | 323.790227 | 332.092316 | 340.680482 | 363.175533 | 384.993621 | 415.744950 | 983.534801 | 998.586195 | 1084.752793 | 1060.843580 | 1005.934178 | 1047.392524 | 1069.846330 | 1078.613745 | 1134.234334 | 1147.868755 | 1143.944787 | 1163.379980 | 1162.370219 | 1161.320689 | 1150.505869 | 1166.325275 | 1176.640371 | 1187.572960 | 1209.759383 | 1566.965904 | 1477.378494 | 1452.199732 | 1442.304909 | 1442.652198 | 1446.310513 | 1473.570203 | 1520.727395 | 1536.756875 | 1521.720659 | 1543.271417 | 1582.943954 | 1592.941173 | 1628.766533 | 1759.271044 | 1764.109100 | 1809.071969 | 1820.333609 | 1732.555355 | 1668.810725 | 1717.704338 | 1723.356285 | 1722.442084 | 1698.565014 | 1662.182533 | 655.161540 | 93763.963016 | 94412.362824 | 94544.351199 | 95179.136692 | 95719.473249 | 98425.445838 | 100517.519718 | 102283.502277 | 102835.554812 | 103348.794384 | 106586.336120 | 108390.675175 | 109829.542627 | 115001.552419 | 120130.946714 | 124657.803477 | 128696.428972 | 133560.507772 | 134564.578185 | 133035.786773 | 141209.216939 | 145663.353466 | 147631.793974 | 150633.028820 | 151086.416285 | 149964.836008 | 150105.072757 | 152336.727186 | 156245.920083 | 155930.990907 | 148943.605131 | 1.671458 | 2.313185 | 2.186088 | 2.671147 | 2.357393 | 2.194625 | 2.259751 | 3.049710 | 3.148447 | 3.508088 | 2.771929 | 2.991690 | 2.606149 | 2.980520 | 1.703452 | 3.443774 | 2.684339 | 2.860275 | 2.507445 | 1.868443 | 2.133314 | 1.003334 | 1.077603 | 2.224569 | 2.106418 | 2.624988 | 2.743790 | 3.445901 | 2.263204 | 2.539595 | 1.230054 | 1.246148 | 1.618922 | 1.773452 | 3.298974 | 4.241317 | 4.486338 | 3.192266 | 2.876547 | 3.788030 | 3.033952 | 2.920878 | 3.592008 | 5.431132 | 4.595557 | 5.236552 | 5.241805 | 3.692486 | -0.200104 | 3.934932 | 3.368764 | 3.018480 | 2.820088 | 3.050597 | 2.499586 | 2.938269 | 3.023440 | 2.897285 | 2.702009 | -4.996384 | 5.198718 | 3.830875 | 0.681620 | 1.244591 | 1.131709 | 1.606476 | 1.312223 | 1.107224 | 1.132206 | 1.920430 | 1.995911 | 2.354107 | 1.553891 | 1.806080 | 1.445676 | 1.789197 | 0.525972 | 2.185883 | 1.438262 | 1.604518 | 1.209651 | 0.568230 | 0.721811 | -0.421335 | -0.354279 | 0.813634 | 0.694131 | 1.188073 | 1.247861 | 1.960190 | 0.787534 | 1.047772 | -0.291973 | -0.188821 | 0.235795 | 0.457040 | 2.027233 | 2.758220 | 3.086517 | 1.904129 | 1.619351 | 2.524403 | 1.771119 | 1.630578 | 2.292065 | 4.091092 | 3.225777 | 3.752001 | 3.755220 | 2.237681 | -1.581172 | 2.594070 | 2.162453 | 1.785126 | 1.452135 | 1.713475 | 1.226397 | 1.705699 | 1.843871 | 1.722501 | 1.569738 | -5.915502 | 4.324804 | 2.894463 | 2.264454 | 2.330707 | 2.380858 | 2.316108 | 2.304132 | 2.258272 | 2.253934 | 2.277120 | 2.239347 | 2.216607 | 2.241673 | 2.174686 | 2.158036 | 2.139245 | 2.094872 | 2.099519 | 2.052053 | 1.981784 | 1.979243 | 2.037863 | 2.021840 | 2.087415 | 2.152887 | 2.114348 | 2.069623 | 2.052115 | 2.050109 | 1.963819 | 1.933231 | 1.699442 | 1.601467 | 1.732981 | 1.623664 | 1.500728 | 1.484108 | 1.668160 | 1.624936 | 1.521329 | 1.428115 | 1.406551 | 1.376862 | 1.421472 | 1.411257 | 1.413058 | 1.449045 | 1.545530 | 1.542833 | 1.517732 | 1.447629 | 1.355594 | 1.264662 | 1.322436 | 1.377639 | 1.345282 | 1.287322 | 1.229685 | 1.151059 | 1.138190 | 1.121918 | 1.008800 | 0.848505 | 0.878030 |
| std | 48.907388 | 48.315766 | 47.709248 | 46.885191 | 46.489787 | 45.846403 | 44.945891 | 44.609569 | 43.794481 | 43.014116 | 35.085949 | 34.850426 | 34.379763 | 33.818423 | 33.393834 | 33.104228 | 32.642630 | 32.029149 | 31.717618 | 31.306659 | 30.792498 | 29.998803 | 29.445194 | 29.183726 | 28.635353 | 28.115342 | 27.357172 | 26.767866 | 25.960180 | 25.744857 | 25.387192 | 24.905878 | 1017.679239 | 1022.648403 | 1041.560881 | 1077.213818 | 1127.745938 | 1147.256662 | 1153.432518 | 1170.355261 | 1246.461739 | 1320.940568 | 1395.471565 | 2932.233999 | 2895.053095 | 3227.942399 | 3029.714281 | 2396.686780 | 2540.925189 | 2539.995545 | 2408.904202 | 2523.249021 | 2585.256790 | 2545.302704 | 2679.737891 | 2725.267926 | 2437.883652 | 2233.385593 | 2246.897849 | 2229.904505 | 2248.533890 | 2305.430463 | 2294.768874 | 2327.168681 | 2303.400847 | 2301.261097 | 2329.112190 | 2320.956962 | 2360.679480 | 2575.407326 | 2655.723558 | 2564.256840 | 2565.563006 | 2667.870568 | 2703.126784 | 2716.643885 | 2801.387154 | 2737.396592 | 2782.036422 | 2813.705496 | 2816.081686 | 2738.272900 | 2785.021740 | 2873.683791 | 2866.302772 | 2815.857667 | 2823.662664 | 1879.400593 | 405593.962471 | 406328.762106 | 410280.953519 | 419132.887899 | 423505.745749 | 435796.933877 | 443974.682287 | 459022.725143 | 464325.040883 | 464160.142557 | 480652.754670 | 485351.058630 | 486842.827746 | 514800.828696 | 547221.078060 | 579875.902009 | 605453.063262 | 637727.395130 | 641295.333073 | 652697.712970 | 704684.966140 | 744834.482045 | 754886.210006 | 784670.534192 | 788926.877865 | 775937.949405 | 773434.615661 | 786323.727832 | 820818.216035 | 827715.061928 | 821874.441016 | 4.406033 | 4.345739 | 4.609193 | 4.524325 | 4.194551 | 4.081920 | 5.872687 | 6.699031 | 4.779952 | 6.387050 | 4.554912 | 5.477454 | 5.358358 | 6.081975 | 5.266602 | 6.016418 | 4.763407 | 5.248625 | 5.854376 | 5.675049 | 4.961015 | 4.510804 | 4.608077 | 4.094936 | 4.125180 | 4.425512 | 4.651521 | 5.397259 | 5.215615 | 6.515361 | 7.600735 | 8.069489 | 6.779419 | 7.250956 | 5.138925 | 8.256008 | 11.065553 | 5.099460 | 4.358260 | 4.049576 | 5.532003 | 4.494740 | 5.606206 | 5.998920 | 4.421342 | 4.708579 | 4.241795 | 6.051326 | 5.655943 | 4.457708 | 5.644571 | 8.325560 | 5.044485 | 3.694823 | 4.689391 | 3.738093 | 4.102022 | 3.127704 | 3.525398 | 8.429749 | 6.070269 | 6.427968 | 3.958586 | 3.544250 | 4.008325 | 3.760477 | 3.517736 | 3.446455 | 5.394319 | 6.090062 | 4.098207 | 5.518800 | 3.841689 | 4.842087 | 4.797148 | 5.466268 | 4.915792 | 5.278440 | 4.234676 | 4.968219 | 5.413943 | 5.198012 | 4.585359 | 4.358066 | 4.462551 | 3.966861 | 4.068728 | 4.331611 | 4.534027 | 5.184153 | 5.108612 | 6.179083 | 7.263615 | 7.640094 | 6.339078 | 6.764802 | 5.976519 | 7.686015 | 10.423881 | 4.963977 | 4.177112 | 4.042049 | 5.313032 | 4.396537 | 5.468723 | 5.555608 | 4.215258 | 4.423173 | 4.272250 | 5.943267 | 5.123298 | 4.107765 | 5.375299 | 8.569161 | 4.674218 | 3.422074 | 4.693460 | 3.790104 | 4.138828 | 3.020802 | 3.473157 | 8.273661 | 6.258276 | 6.279185 | 1.468415 | 1.502761 | 1.663652 | 1.501529 | 1.548015 | 1.551840 | 1.586755 | 1.854218 | 1.980709 | 1.861918 | 1.901313 | 1.777686 | 1.737723 | 1.578829 | 1.617631 | 1.903800 | 1.913568 | 1.846858 | 1.894073 | 1.645424 | 1.847492 | 1.942834 | 2.045677 | 1.934482 | 1.821101 | 1.766101 | 1.719653 | 1.676138 | 1.683553 | 2.729484 | 2.504301 | 2.171485 | 1.881062 | 1.982094 | 1.906994 | 1.752106 | 1.749015 | 1.647788 | 1.672460 | 1.370743 | 1.392488 | 1.378254 | 1.392322 | 1.359771 | 1.513138 | 2.018527 | 2.251149 | 2.054745 | 1.763262 | 1.515568 | 1.616468 | 1.529739 | 1.618706 | 1.709519 | 1.496230 | 1.363899 | 1.319466 | 1.332566 | 1.262967 | 1.235314 | 1.342774 | 1.648741 |
| min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -27.270000 | -19.685042 | -12.278657 | -12.464993 | -12.481834 | -7.659066 | -15.743628 | -5.474906 | -6.554143 | -5.649650 | -11.331719 | -13.973729 | -17.047584 | -18.266379 | -14.803126 | -26.768233 | -12.582091 | -24.049206 | -26.478789 | -21.599649 | -19.826716 | -20.729888 | -16.051386 | -16.819835 | -11.144353 | -14.958137 | -17.146042 | -13.379844 | -42.451118 | -14.788226 | -64.047107 | -44.899999 | -29.300002 | -50.248067 | -12.416322 | -16.699998 | -14.115381 | -28.099980 | -9.424162 | -14.277000 | -9.310638 | -12.489192 | -36.658153 | -5.807538 | -12.702893 | -8.510638 | -6.976744 | -17.668946 | -17.573222 | -8.924176 | -50.338515 | -46.082122 | -36.391977 | -23.042806 | -27.994546 | -9.375124 | -8.000000 | -19.268850 | -11.320755 | -54.335876 | -20.738839 | -29.100000 | -26.527644 | -20.910074 | -14.521498 | -14.404949 | -15.115017 | -10.550791 | -17.500310 | -6.758929 | -9.268812 | -9.012213 | -13.307118 | -15.177659 | -19.229269 | -20.316483 | -16.539297 | -26.325029 | -14.628880 | -25.884051 | -28.440545 | -24.277368 | -22.378561 | -24.430735 | -19.945535 | -19.291714 | -13.852511 | -20.120820 | -19.405123 | -15.246304 | -43.566902 | -14.765157 | -64.425841 | -45.325107 | -29.841290 | -41.586861 | -14.077047 | -18.348264 | -13.590923 | -29.413291 | -11.202914 | -16.419985 | -11.610002 | -14.964033 | -38.561720 | -6.466319 | -12.635127 | -8.297335 | -13.923387 | -18.323503 | -17.145394 | -13.154428 | -47.899986 | -48.392454 | -36.777697 | -24.463161 | -29.921761 | -12.202835 | -9.033697 | -18.586782 | -12.429867 | -55.188681 | -22.966942 | -22.419285 | -1.015528 | -1.510091 | -1.845309 | -2.110700 | -2.354033 | -2.596081 | -2.829547 | -3.085539 | -4.787105 | -2.562076 | -1.591048 | -2.135137 | -2.423670 | -2.545805 | -2.719639 | -6.458686 | -4.324284 | -8.262824 | -9.249471 | -3.924172 | -11.275324 | -10.053006 | -1.365708 | -0.517770 | -0.558423 | -0.608561 | -1.210190 | -1.626002 | -2.273961 | -27.722225 | -22.347974 | -5.964186 | -8.101781 | -16.049153 | -16.880630 | -3.629546 | -3.207518 | -5.924733 | -10.955149 | -4.075386 | -3.847671 | -2.958573 | -3.345834 | -3.720032 | -4.077969 | -4.390534 | -4.663536 | -4.905031 | -5.084451 | -5.343938 | -4.793554 | -5.280078 | -5.033810 | -6.852118 | -4.415744 | -2.217280 | -3.755484 | -4.048391 | -2.904996 | -2.984077 | -4.256649 | -14.257037 |
| 25% | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 3.826888 | 7.325682 | 43.700000 | 46.200000 | 47.000000 | 51.516457 | 51.871391 | 55.227303 | 55.800335 | 56.910419 | 59.122952 | 59.568451 | 63.167737 | 64.062560 | 65.746750 | 69.413651 | 71.201126 | 72.984283 | 75.920000 | 79.200000 | 80.700829 | 83.500000 | 85.635345 | 86.291473 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 128.303790 | 137.847375 | 139.061470 | 153.276840 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 194.500000 | 221.400000 | 282.610000 | 252.100000 | 278.300000 | 292.900000 | 310.700000 | 338.300000 | 377.420000 | 378.530000 | 388.840000 | 442.500000 | 443.760000 | 450.200000 | 416.900000 | 446.500000 | 518.700000 | 491.600000 | 474.300000 | 510.600000 | 519.100000 | 573.400000 | 525.600000 | 558.790000 | 506.800000 | 585.900000 | 593.200000 | 602.300000 | 656.100000 | 697.200000 | 687.600000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.185104 | 1.014396 | 0.068038 | 0.000000 | 1.127907 | 0.213333 | 0.257180 | 1.037983 | 2.570134 | 2.228270 | 2.646955 | 2.620331 | 0.446929 | -3.764578 | 1.395939 | 1.107544 | 0.000000 | 0.632137 | 0.984469 | 0.967703 | 1.164724 | 1.317188 | 1.393176 | 0.818526 | -7.816951 | 2.111831 | 1.700000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -0.061308 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -1.740252 | -1.057654 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -0.798462 | -1.834742 | -1.736630 | -1.319461 | -0.007442 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -0.109438 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.197055 | 0.788917 | 1.103829 | 0.975922 | 0.000000 | -4.611748 | 0.121592 | 0.025024 | -0.551991 | 0.000000 | 0.174328 | -0.024726 | 0.010796 | 0.077665 | 0.182368 | 0.000000 | -8.672432 | 1.013321 | 0.000000 | 1.402060 | 1.366899 | 1.378477 | 1.382205 | 1.359458 | 1.241618 | 1.223654 | 1.226726 | 1.110437 | 1.191297 | 1.205085 | 1.122448 | 1.066259 | 1.067901 | 1.063726 | 1.002658 | 0.963867 | 0.850876 | 0.809721 | 0.862384 | 0.776216 | 0.894085 | 0.894335 | 0.856055 | 0.811017 | 0.732589 | 0.810550 | 0.648304 | 0.765915 | 0.712313 | 0.681643 | 0.650644 | 0.588262 | 0.447447 | 0.492793 | 0.535571 | 0.514767 | 0.574789 | 0.516384 | 0.565925 | 0.468037 | 0.458574 | 0.433075 | 0.432788 | 0.463308 | 0.476921 | 0.455526 | 0.512387 | 0.478246 | 0.425941 | 0.355338 | 0.376272 | 0.416901 | 0.359828 | 0.354975 | 0.287421 | 0.234671 | 0.323321 | 0.290665 | 0.143641 | 0.105799 | 0.185163 |
| 50% | 0.000000 | 5.600000 | 49.200000 | 61.384029 | 69.253830 | 74.915398 | 77.492043 | 82.029404 | 85.040000 | 88.518177 | 95.416351 | 96.276352 | 96.100000 | 96.297951 | 96.615067 | 96.725266 | 97.773399 | 97.224396 | 97.942963 | 98.296799 | 98.691376 | 98.840000 | 99.100000 | 99.145050 | 99.274551 | 99.409804 | 99.654099 | 99.851913 | 99.820000 | 99.900000 | 99.990000 | 100.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 188.465042 | 197.934041 | 213.718746 | 226.062858 | 252.530536 | 259.179567 | 262.447866 | 267.046341 | 273.907471 | 273.651797 | 268.733676 | 272.461128 | 276.230740 | 277.953645 | 290.501090 | 279.205331 | 281.783690 | 278.999338 | 276.995339 | 531.569298 | 487.880042 | 468.394735 | 465.482265 | 465.158384 | 474.463701 | 474.417013 | 491.812232 | 486.732273 | 486.344819 | 505.936912 | 480.887218 | 500.298345 | 511.252355 | 683.277637 | 722.945627 | 729.808628 | 741.280222 | 574.121208 | 583.805983 | 625.268921 | 608.866553 | 656.711173 | 631.919735 | 554.226022 | 0.000000 | 3179.000000 | 3193.420000 | 3585.400000 | 3795.200000 | 3252.100000 | 3781.200000 | 4053.600000 | 4493.900000 | 4767.340000 | 4745.200000 | 4772.800000 | 4708.800000 | 4620.100000 | 5206.000000 | 5459.900000 | 5609.900000 | 5584.400000 | 6131.500000 | 6474.700000 | 6666.200000 | 6539.200000 | 7430.800000 | 7299.500000 | 7402.300000 | 7282.300000 | 7537.900000 | 8143.000000 | 8173.500000 | 8589.000000 | 8965.700000 | 8312.500000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.041547 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.287558 | 1.420933 | 1.867612 | 1.682282 | 2.655262 | 2.150267 | 0.999993 | 1.015128 | 0.929214 | 1.233558 | 2.400691 | 3.030863 | 3.667180 | 3.966651 | 3.477834 | 2.902214 | 3.868718 | 2.626010 | 3.014283 | 3.747398 | 4.784467 | 4.310239 | 5.244100 | 5.168231 | 3.436374 | 0.000000 | 3.731140 | 3.421809 | 2.690350 | 3.004823 | 3.090328 | 2.767676 | 2.953509 | 3.079851 | 2.918307 | 2.500000 | -3.739562 | 4.687478 | 3.674517 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.410835 | 0.410783 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.768829 | 1.758333 | 1.994095 | 2.100125 | 1.781324 | 1.339021 | 2.689647 | 1.286508 | 1.313732 | 2.514866 | 3.505743 | 2.877489 | 3.501312 | 3.621787 | 1.838240 | -1.256816 | 2.485985 | 2.402779 | 1.348536 | 1.538974 | 1.765941 | 1.622166 | 1.812607 | 1.860147 | 1.876401 | 1.568211 | -4.792468 | 3.752521 | 2.386469 | 2.213284 | 2.311689 | 2.315196 | 2.381616 | 2.382829 | 2.311738 | 2.302016 | 2.268876 | 2.263434 | 2.234855 | 2.200844 | 2.157027 | 2.196357 | 2.198373 | 2.172281 | 2.091391 | 2.032555 | 1.964645 | 1.957311 | 2.047964 | 2.002974 | 2.075682 | 2.092556 | 2.120070 | 2.075109 | 2.059502 | 2.022787 | 2.016939 | 1.965285 | 1.896626 | 1.900942 | 1.775504 | 1.671828 | 1.569307 | 1.589701 | 1.660946 | 1.578534 | 1.542806 | 1.431673 | 1.383747 | 1.343320 | 1.277639 | 1.254593 | 1.269573 | 1.319053 | 1.294402 | 1.233193 | 1.270618 | 1.249485 | 1.251517 | 1.245454 | 1.243571 | 1.159251 | 1.132158 | 1.120928 | 1.121608 | 1.088676 | 1.140549 | 1.074975 | 0.970386 | 0.844182 | 0.884624 |
| 75% | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 99.982925 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 799.749504 | 865.592481 | 841.332828 | 823.743652 | 826.952081 | 876.299690 | 867.229471 | 912.906025 | 923.227200 | 988.003660 | 934.157558 | 1040.545443 | 972.687917 | 1051.353989 | 1029.125832 | 1121.922084 | 1101.294740 | 1121.570432 | 1235.014149 | 2280.882812 | 2201.633412 | 2110.495548 | 2049.448756 | 1947.807360 | 1879.094587 | 1986.836971 | 2057.432496 | 2077.574238 | 2081.749959 | 2039.054164 | 2109.680384 | 2119.782080 | 2223.783121 | 2252.855716 | 2261.483778 | 2314.441463 | 2402.426395 | 2337.271387 | 2295.343190 | 2222.629692 | 2211.807053 | 2149.602569 | 2204.243299 | 2176.843306 | 0.000000 | 40868.900000 | 37788.500000 | 36857.300000 | 40504.600000 | 38651.800000 | 42370.600000 | 44006.000000 | 42273.500000 | 44630.800000 | 44322.000000 | 44401.100000 | 46185.600000 | 45596.100000 | 48931.500000 | 49365.200000 | 50592.500000 | 52930.400000 | 52598.700000 | 53014.200000 | 48765.700000 | 50937.300000 | 51230.200000 | 48220.300000 | 49751.900000 | 46161.700000 | 49851.900000 | 48694.800000 | 53272.700000 | 50121.300000 | 54692.700000 | 46324.300000 | 3.844734 | 4.482859 | 4.405974 | 5.459057 | 4.760000 | 4.441159 | 4.313622 | 5.093751 | 5.574674 | 5.598515 | 4.698992 | 5.643553 | 5.896580 | 5.533804 | 2.923532 | 6.080396 | 5.100590 | 6.208864 | 4.939124 | 4.564243 | 4.814820 | 2.634297 | 3.491525 | 4.792699 | 4.241336 | 4.776564 | 4.764533 | 5.996855 | 4.777585 | 4.458587 | 4.291342 | 4.758581 | 4.850001 | 5.100000 | 5.911908 | 6.047268 | 5.925837 | 4.993528 | 4.699227 | 5.798782 | 4.805453 | 4.848013 | 6.353137 | 7.032395 | 6.699999 | 7.414121 | 7.415952 | 6.225894 | 3.368975 | 6.402565 | 6.039008 | 5.456389 | 5.227704 | 5.066422 | 4.489282 | 4.750317 | 4.716465 | 4.774417 | 4.665678 | -0.819785 | 7.403368 | 5.648325 | 1.496392 | 2.021470 | 2.557472 | 3.291864 | 2.490604 | 2.240466 | 2.124083 | 3.530467 | 3.405688 | 3.180126 | 2.684019 | 3.612610 | 3.979082 | 3.172396 | 1.186000 | 4.446607 | 3.233480 | 4.198675 | 3.162182 | 2.451575 | 2.680776 | 1.141571 | 1.898275 | 3.019292 | 2.682174 | 2.705521 | 2.978870 | 4.188665 | 3.259730 | 3.095333 | 2.285325 | 3.138686 | 3.150491 | 3.550976 | 3.819876 | 4.461776 | 4.372417 | 4.068483 | 3.688198 | 4.293374 | 3.214135 | 3.545381 | 4.642978 | 5.524827 | 5.180369 | 5.886420 | 6.067421 | 4.513711 | 1.392178 | 4.601538 | 4.735771 | 3.780489 | 3.487417 | 3.653587 | 3.508054 | 3.383651 | 3.745446 | 3.624263 | 3.354972 | -1.953084 | 6.500471 | 4.831341 | 2.958751 | 2.968333 | 2.963649 | 2.945226 | 2.955650 | 2.923062 | 2.917223 | 2.912352 | 2.858207 | 2.824324 | 2.789148 | 2.731882 | 2.781958 | 2.823886 | 2.906562 | 2.878888 | 2.845966 | 2.858475 | 2.831005 | 2.805115 | 2.816197 | 2.823571 | 2.801671 | 2.833689 | 2.853199 | 2.911396 | 2.921521 | 2.869367 | 2.797643 | 2.816912 | 2.695093 | 2.699827 | 2.596347 | 2.520597 | 2.501899 | 2.465731 | 2.351452 | 2.330204 | 2.388501 | 2.404784 | 2.402401 | 2.414982 | 2.310405 | 2.372008 | 2.424600 | 2.359211 | 2.293356 | 2.299710 | 2.342411 | 2.351856 | 2.207643 | 2.179882 | 2.238167 | 2.194277 | 2.200322 | 2.155312 | 2.079223 | 2.020537 | 1.950856 | 1.897015 | 1.796101 | 1.893246 |
| max | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 10523.406690 | 10534.018210 | 10414.540920 | 10465.813020 | 11150.050340 | 10926.395170 | 10480.606180 | 10361.972570 | 11114.667260 | 11974.192590 | 12106.152940 | 37113.339530 | 36146.699610 | 40710.112180 | 37557.340360 | 25763.473230 | 28353.128200 | 28090.673700 | 25027.221060 | 25303.041580 | 26517.451250 | 25001.641670 | 27354.021630 | 28902.850640 | 19859.938130 | 15434.159310 | 15688.816930 | 15567.239820 | 15695.517560 | 15525.980570 | 14776.657040 | 15820.493950 | 16022.739570 | 16120.991850 | 15708.122700 | 15819.141850 | 16337.335740 | 18479.284990 | 18814.372340 | 18454.004720 | 16906.150290 | 17777.159560 | 19140.337860 | 18879.894460 | 21420.628500 | 19635.424350 | 19300.107380 | 17969.106890 | 16353.831270 | 16911.079780 | 17023.166820 | 18157.598100 | 19987.584350 | 19819.982890 | 19903.475110 | 17478.893040 | 4844517.400000 | 4807496.500000 | 4879626.100000 | 4995209.500000 | 5066803.000000 | 5117036.900000 | 5273486.300000 | 5543349.400000 | 5590536.300000 | 5609017.300000 | 5775807.200000 | 5748261.800000 | 5593024.400000 | 5658992.000000 | 5738286.000000 | 5824625.100000 | 6437470.300000 | 6993182.800000 | 7199604.700000 | 7719071.400000 | 8474922.700000 | 9282553.700000 | 9540539.700000 | 9979128.000000 | 10021043.400000 | 9859281.200000 | 9860914.000000 | 10089273.200000 | 10567262.000000 | 10762824.000000 | 10944686.200000 | 14.766843 | 24.521297 | 34.313729 | 27.690886 | 17.035712 | 19.200000 | 66.219084 | 81.887797 | 25.666169 | 58.647331 | 25.821107 | 27.423969 | 26.404881 | 45.302754 | 30.073699 | 35.625338 | 23.307491 | 22.003005 | 26.139296 | 24.753727 | 19.688334 | 23.597700 | 16.666671 | 16.711594 | 21.792774 | 24.542499 | 29.054324 | 34.600001 | 14.190637 | 57.817828 | 49.447379 | 34.745320 | 33.990468 | 21.221411 | 35.224078 | 88.957666 | 149.972963 | 34.857095 | 25.664015 | 19.681269 | 63.379875 | 26.524135 | 17.326020 | 53.381794 | 27.961538 | 34.500000 | 25.000000 | 65.000000 | 21.390528 | 25.122595 | 21.616648 | 86.826748 | 21.079014 | 15.217391 | 24.475253 | 29.212121 | 32.491802 | 8.668006 | 23.412017 | 43.479652 | 37.687192 | 63.368439 | 13.443330 | 20.871865 | 32.168302 | 24.077314 | 14.197889 | 15.226092 | 62.343394 | 77.511102 | 22.548707 | 51.857146 | 23.343793 | 24.516490 | 22.131876 | 44.456487 | 26.921565 | 32.563746 | 20.100419 | 20.442132 | 23.604162 | 20.920310 | 19.208867 | 20.940746 | 11.717021 | 13.690020 | 20.639619 | 19.733137 | 25.319721 | 32.009167 | 11.671573 | 55.889245 | 46.470687 | 29.855792 | 31.315593 | 17.596283 | 60.090541 | 81.355165 | 140.480042 | 30.629377 | 20.811297 | 18.911492 | 55.590330 | 19.557657 | 14.773836 | 49.031637 | 26.660095 | 33.030488 | 23.590685 | 65.386605 | 17.143534 | 21.917391 | 18.697438 | 96.956420 | 18.014653 | 12.626580 | 23.304693 | 30.174901 | 30.496301 | 8.411602 | 21.619974 | 43.758799 | 35.833873 | 62.528291 | 10.638254 | 11.774148 | 12.851885 | 12.147917 | 11.964503 | 11.988676 | 12.114861 | 12.612111 | 17.039974 | 16.295475 | 14.475650 | 13.006835 | 11.808203 | 10.810504 | 9.944741 | 13.168342 | 14.055088 | 13.029515 | 12.406977 | 10.996347 | 12.218296 | 12.920716 | 20.473239 | 19.052111 | 16.686319 | 14.873595 | 13.420984 | 12.171292 | 11.047226 | 10.219241 | 8.522739 | 19.052040 | 14.964455 | 9.864262 | 6.046536 | 16.625502 | 13.247067 | 11.897899 | 10.699857 | 5.580387 | 5.785413 | 6.449321 | 7.541019 | 6.851939 | 8.706429 | 17.898545 | 19.360429 | 17.399086 | 13.422921 | 6.611552 | 10.684073 | 9.758169 | 9.226496 | 11.794016 | 9.219918 | 7.212802 | 4.394554 | 4.556082 | 3.931356 | 3.727101 | 3.707424 | 3.712988 |
# Extracting columns related to CO2 emissions from the DataFrame
co2_cols = [col for col in df.columns if 'CO2' in col]
# Extracting columns related to GDP per capita from the DataFrame
gdp_per_capita_cols = [col for col in df.columns if 'GDP per capita' in col]
print(co2_cols)
print(gdp_per_capita_cols)
['1990(CO2)', '1991(CO2)', '1992(CO2)', '1993(CO2)', '1994(CO2)', '1995(CO2)', '1996(CO2)', '1997(CO2)', '1998(CO2)', '1999(CO2)', '2000(CO2)', '2001(CO2)', '2002(CO2)', '2003(CO2)', '2004(CO2)', '2005(CO2)', '2006(CO2)', '2007(CO2)', '2008(CO2)', '2009(CO2)', '2010(CO2)', '2011(CO2)', '2012(CO2)', '2013(CO2)', '2014(CO2)', '2015(CO2)', '2016(CO2)', '2017(CO2)', '2018(CO2)', '2019(CO2)', '2020(CO2)'] ['1961(GDP per capita annual growth)', '1962(GDP per capita annual growth)', '1963(GDP per capita annual growth)', '1964(GDP per capita annual growth)', '1965(GDP per capita annual growth)', '1966(GDP per capita annual growth)', '1967(GDP per capita annual growth)', '1968(GDP per capita annual growth)', '1969(GDP per capita annual growth)', '1970(GDP per capita annual growth)', '1971(GDP per capita annual growth)', '1972(GDP per capita annual growth)', '1973(GDP per capita annual growth)', '1974(GDP per capita annual growth)', '1975(GDP per capita annual growth)', '1976(GDP per capita annual growth)', '1977(GDP per capita annual growth)', '1978(GDP per capita annual growth)', '1979(GDP per capita annual growth)', '1980(GDP per capita annual growth)', '1981(GDP per capita annual growth)', '1982(GDP per capita annual growth)', '1983(GDP per capita annual growth)', '1984(GDP per capita annual growth)', '1985(GDP per capita annual growth)', '1986(GDP per capita annual growth)', '1987(GDP per capita annual growth)', '1988(GDP per capita annual growth)', '1989(GDP per capita annual growth)', '1990(GDP per capita annual growth)', '1991(GDP per capita annual growth)', '1992(GDP per capita annual growth)', '1993(GDP per capita annual growth)', '1994(GDP per capita annual growth)', '1995(GDP per capita annual growth)', '1996(GDP per capita annual growth)', '1997(GDP per capita annual growth)', '1998(GDP per capita annual growth)', '1999(GDP per capita annual growth)', '2000(GDP per capita annual growth)', '2001(GDP per capita annual growth)', '2002(GDP per capita annual growth)', '2003(GDP per capita annual growth)', '2004(GDP per capita annual growth)', '2005(GDP per capita annual growth)', '2006(GDP per capita annual growth)', '2007(GDP per capita annual growth)', '2008(GDP per capita annual growth)', '2009(GDP per capita annual growth)', '2010(GDP per capita annual growth)', '2011(GDP per capita annual growth)', '2012(GDP per capita annual growth)', '2013(GDP per capita annual growth)', '2014(GDP per capita annual growth)', '2015(GDP per capita annual growth)', '2016(GDP per capita annual growth)', '2017(GDP per capita annual growth)', '2018(GDP per capita annual growth)', '2019(GDP per capita annual growth)', '2020(GDP per capita annual growth)', '2021(GDP per capita annual growth)', '2022(GDP per capita annual growth)']
# Create a figure
fig = go.Figure()
# Add traces for each column in co2_cols
for col in co2_cols:
fig.add_trace(go.Scatter(x=df.columns[4:], y=df[col], mode='lines', name=col))
# Update layout
fig.update_layout(
title='CO2 Emissions Over Time',
xaxis_title='Year',
yaxis_title='CO2 emissions (kt)',
hovermode='x', # Display closest data point on hover
height=600, # Adjust height for better visibility
width=1000 # Adjust width for better visibility
)
# Show plot
fig.show()
# Extracting the latest year from the column names
gdp_per_capita_columns = [col for col in df.columns if 'GDP per capita' in col]
latest_year = max(gdp_per_capita_columns, key=lambda x: int(x.split('(')[0]))
# Creating the 'latest_gdp_per_capita' column if it exists
if latest_year in df.columns:
df['latest_gdp_per_capita'] = df[latest_year].apply(lambda x: round(x, 3)) # Round to two decimal places
else:
print(f"The column {latest_year} does not exist.")
# Creating a bar plot using Plotly
fig = px.bar(df.nlargest(10, 'latest_gdp_per_capita'),
x='Country Name',
y='latest_gdp_per_capita',
labels={'Country Name': 'Country', 'latest_gdp_per_capita': 'GDP per capita'},
title=f'Top 10 Countries by GDP per Capita in {latest_year.split("(")[0]}',
text='latest_gdp_per_capita')
# Customizing layout
fig.update_layout(xaxis_tickangle=-45, # Rotate x-axis labels for better readability
xaxis_title='Country',
yaxis_title='GDP per capita',
height=600, # Adjust height for better visibility
width=1000) # Adjust width for better visibility
# Showing the plot
fig.show()
# Extracting the latest year from the column names
co2_columns = [col for col in df.columns if 'CO2' in col]
latest_year = max(co2_columns, key=lambda x: int(x.split('(')[0]))
# Creating the 'latest_co2' column if it exists
if latest_year in df.columns:
df['latest_co2'] = df[latest_year].apply(lambda x: round(x, 2)) # Round to two decimal places
else:
print(f"The column {latest_year} does not exist.")
# Creating a bar plot using Plotly
fig = px.bar(df.nlargest(10, 'latest_co2'),
x='Country Name',
y='latest_co2',
labels={'Country Name': 'Country', 'latest_co2': 'CO2 Emissions'},
title=f'Top 10 Countries by CO2 Emissions in {latest_year.split("(")[0]}',
text='latest_co2')
# Customizing layout
fig.update_layout(xaxis_tickangle=-45, # Rotate x-axis labels for better readability
xaxis_title='Country',
yaxis_title='CO2 Emissions',
height=600, # Adjust height for better visibility
width=1000) # Adjust width for better visibility
# Showing the plot
fig.show()
# Specify the latest year
latest_year = '2020'
# Create scatter plot using Plotly Express
fig = px.scatter(df, x=f'{latest_year}(GDP per capita annual growth)', y=f'{latest_year}(CO2)',
labels={f'{latest_year}(GDP per capita annual growth)': f'GDP per capita annual growth in {latest_year}',
f'{latest_year}(CO2)': f'CO2 emissions in {latest_year}'},
title=f'ae{latest_year}')
# Customize layout
fig.update_layout(xaxis_title=f'GDP per capita annual growth in {latest_year}',
yaxis_title=f'CO2 emissions in {latest_year}',
height=600,
width=1000)
# Show the plot
fig.show()
# Assuming df is your DataFrame containing CO2 emissions and region information
# Group by region and sum CO2 emissions
df_region_co2 = df.groupby('Region')['2020(CO2)'].sum().reset_index()
# Get list of regions and corresponding CO2 emissions
regions = df_region_co2['Region']
co2_emissions = df_region_co2['2020(CO2)']
# Create a list to hold the data for each region
data = []
# Extract CO2 emissions for each region
for region in regions:
region_co2 = df[df['Region'] == region]['2020(CO2)'].sum()
data.append(region_co2)
# Create the stacked bar chart
fig = go.Figure(data=[go.Bar(x=regions, y=data)])
# Update layout
fig.update_layout(
barmode='stack',
title='CO2 Emissions by Region (2020)',
xaxis_title='Region',
yaxis_title='CO2 Emissions (metric tons)'
)
# Show the plot
fig.show()
# Sample DataFrame with hypothetical data
data = {
'Country Name': ['Bangladesh', 'United States', 'Canada', 'Australia'],
'Region': ['South Asia', 'North America', 'North America', 'East Asia & Pacific'],
'IncomeGroup': ['Lower middle income', 'High income', 'High income', 'High income'],
'2022(Population annual growth)': [2.0, 1.5, 0.9, 0.5]
}
# Selecting the columns for plotting
columns_to_plot = ['Country Name', 'Region', 'IncomeGroup']
# Plotting the area plot using Plotly
fig = go.Figure()
for index, row in df.iterrows():
fig.add_trace(go.Scatter(x=columns_to_plot, y=row[columns_to_plot], mode='lines+markers', name=row['Country Name']))
fig.update_layout(
title='Area Plot of Hypothetical Data',
xaxis_title='Category',
yaxis_title='Value',
xaxis=dict(tickangle=-45),
legend=dict(x=0, y=1.0),
margin=dict(l=50, r=50, t=50, b=50),
height=800, # Increase the height
width=1000 # Increase the width
)
fig.show()
# Suppress FutureWarning about iteritems
warnings.filterwarnings("ignore", category=FutureWarning)
# Assuming df is your DataFrame and features_list contains the selected features
selected_features = ['2015(E)', '2015(EG)', '2015(CO2)', '2015(GDP annual growth)', '2015(GDP per capita annual growth)', '2015(Population annual growth)']
# Select the subset of features from the DataFrame
df_selected = df[selected_features]
# Assign unique colors to each data point
num_samples = len(df_selected)
colors = np.linspace(0, 1, num_samples)
# Create the scatter plot matrix using Plotly Express
scatter_matrix = px.scatter_matrix(df_selected, color=colors, title='Scatter Plot Matrix of Selected Features (2020)', height=2000, width=2000)
# Show the plot
scatter_matrix.show()
feature1 = df['Country Name']
feature2 = df['Region']
feature3 = df['IncomeGroup']
# Define a colormap to map each region to a color
colormap = {
'South Asia': 'rgb(255, 0, 0)', # Red
'North America': 'rgb(0, 0, 255)', # Blue
'East Asia & Pacific': 'rgb(0, 255, 0)', # Green
'Europe & Central Asia': 'rgb(255, 255, 0)', # Yellow
'Sub-Saharan Africa': 'rgb(255, 0, 255)', # Magenta
'Latin America & Caribbean': 'rgb(0, 255, 255)', # Cyan
'Middle East & North Africa': 'rgb(128, 0, 128)', # Purple
# Add more regions and colors as needed
}
# Create a list of colors corresponding to each region
colors = [colormap[region] for region in feature2]
# Create 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
x=feature1,
y=feature2,
z=feature3,
text=df['Country Name'],
mode='markers',
marker=dict(
size=8,
color=colors, # Use the colors list
opacity=0.8
)
)])
# Update layout for better visualization
fig.update_layout(
scene=dict(
xaxis_title='Country Name',
yaxis_title='Region',
zaxis_title='IncomeGroup'
),
title='3D Plot of Your Dataset',
margin=dict(l=0, r=0, b=0, t=40),
height=600,
width=800
)
# Show interactive plot
fig.show()
years = range(1990, 2022)
# Create a DataFrame containing only CO2 emissions data
co2_columns = [col for col in df.columns if 'CO2' in col]
co2_emissions_df = df[co2_columns]
# Set the index to a datetime index with yearly frequency
co2_emissions_df.index = pd.date_range(start=f'{years[0]}-01-01', periods=len(co2_emissions_df), freq='Y')
# Perform STL decomposition for each CO2 feature and create interactive plots
for column in co2_emissions_df.columns:
decomposition = seasonal_decompose(co2_emissions_df[column], model='additive')
# Create traces for each component
traces = [
go.Scatter(x=co2_emissions_df.index, y=co2_emissions_df[column], mode='lines', name='Original'),
go.Scatter(x=co2_emissions_df.index, y=decomposition.trend, mode='lines', name='Trend'),
go.Scatter(x=co2_emissions_df.index, y=decomposition.seasonal, mode='lines', name='Seasonal'),
go.Scatter(x=co2_emissions_df.index, y=decomposition.resid, mode='lines', name='Residual')
]
# Create layout for the plot
layout = dict(title=f'STL Decomposition of {column}', xaxis=dict(title='Year'), yaxis=dict(title='CO2 Emissions'))
# Create figure and plot
fig = go.Figure(data=traces, layout=layout)
fig.show()
# Filter columns to select only CO2 emission columns
co2_columns = [col for col in df.columns if 'CO2' in col]
# Create a new DataFrame with only the CO2 emission columns
df_co2 = df[co2_columns]
# If the CO2 emissions are in separate columns for each year, you can aggregate them
# For example, if you have columns like 'CO2_2018', 'CO2_2019', 'CO2_2020', etc.
# You can sum them up to get the total emissions for each year
df_co2_yearly = df_co2.sum(axis=0)
# Create a Plotly line plot for CO2 emissions over the years
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_co2_yearly.index, y=df_co2_yearly.values, mode='lines+markers', name='CO2 Emissions'))
fig.update_layout(title='CO2 Emissions Over the Years',
xaxis_title='Year',
yaxis_title='CO2 Emissions',
template='ggplot2') # Set dark theme for the plot
# Show the plot
fig.show()
# Assuming df is your DataFrame and 'GDP per capita' is one of the features
gdp_per_capita_columns = [col for col in df.columns if 'GDP per capita' in col]
gdp_per_capita_df = df[gdp_per_capita_columns]
# Plot GDP per capita using Plotly Express
fig = px.line(gdp_per_capita_df, title='GDP per Capita Over Time')
fig.update_layout(xaxis_title='Year', yaxis_title='GDP per Capita')
fig.show()
# Reshape the DataFrame to have CO2 emissions year-wise for each country
co2_columns = [col for col in df.columns if 'CO2' in col]
co2_df = df[['Country Code', 'Country Name'] + co2_columns]
co2_df = co2_df.melt(id_vars=['Country Code', 'Country Name'], var_name='Year', value_name='CO2 emissions')
co2_df['Year'] = co2_df['Year'].str.extract('(\d+)').astype(int)
# Plot CO2 emissions on a world map
fig = px.choropleth(co2_df,
locations='Country Code',
color='CO2 emissions',
hover_name='Country Name',
animation_frame='Year',
color_continuous_scale='Viridis', # You can change the color scale
title='CO2 Emissions Worldwide')
fig.update_layout(coloraxis_colorbar=dict(title='CO2 Emissions (metric tons)'))
fig.show()
# Select the features for analysis
features = [
'2015(E)',
'2015(EG)',
'2015(CO2)',
'2015(GDP annual growth)',
'2015(GDP per capita annual growth)',
'2015(Population annual growth)',
]
# Standardize the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[features])
# Apply PCA for dimensionality reduction
pca = PCA(n_components=2)
pca_result = pca.fit_transform(scaled_features)
# Apply t-SNE for dimensionality reduction
tsne = TSNE(n_components=2, random_state=42)
tsne_result = tsne.fit_transform(scaled_features)
# Apply KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(scaled_features)
clusters = kmeans.predict(scaled_features)
# Create DataFrame for visualization
visualization_df = pd.DataFrame({
'PCA1': pca_result[:, 0],
'PCA2': pca_result[:, 1],
't-SNE1': tsne_result[:, 0],
't-SNE2': tsne_result[:, 1],
'Cluster': clusters
})
# Plot interactive visualization
fig = px.scatter(visualization_df, x='PCA1', y='PCA2', color='Cluster',
hover_data=[visualization_df.index], title='PCA Cluster Analysis')
fig.update_traces(marker=dict(size=8))
fig.show()
fig = px.scatter(visualization_df, x='t-SNE1', y='t-SNE2', color='Cluster',
hover_data=[visualization_df.index], title='t-SNE Cluster Analysis')
fig.update_traces(marker=dict(size=8))
fig.show()
# Adjust the DataFrame to include only numeric columns
df_numeric = df.select_dtypes(include=['float64', 'int64'])
# Impute missing values with the mean
df_numeric = df_numeric.fillna(df_numeric.mean())
# Compute the correlation matrix for the numeric DataFrame
corr_matrix_numeric = df_numeric.corr()
# Create a heatmap figure using Plotly
fig = go.Figure(data=go.Heatmap(
z=corr_matrix_numeric.values,
x=corr_matrix_numeric.columns,
y=corr_matrix_numeric.columns,
colorscale='RdBu',
zmin=-1,
zmax=1,
colorbar=dict(
title='Correlation',
tickvals=[-1, -0.5, 0, 0.5, 1],
ticktext=['-1', '-0.5', '0', '0.5', '1'],
ticks='outside',
dtick=0.5
)
))
# Customize layout
fig.update_layout(
title='Correlation Matrix',
xaxis_title='Features',
yaxis_title='Features',
width=1000,
height=1000
)
# Show the plot
fig.show()
columns_match = (corr_matrix_numeric.columns == df_numeric.columns).all()
index_match = (corr_matrix_numeric.index == df_numeric.columns).all()
print("\nColumns Match:", columns_match)
print("Index Match:", index_match)
Columns Match: True Index Match: True
# Print the values of corr_matrix.columns and corr_matrix.index
print("\nColumns of Correlation Matrix:")
print(corr_matrix_numeric.columns)
Columns of Correlation Matrix:
Index(['1990(E)', '1991(E)', '1992(E)', '1993(E)', '1994(E)', '1995(E)',
'1996(E)', '1997(E)', '1998(E)', '1999(E)',
...
'2015(Population annual growth)', '2016(Population annual growth)',
'2017(Population annual growth)', '2018(Population annual growth)',
'2019(Population annual growth)', '2020(Population annual growth)',
'2021(Population annual growth)', '2022(Population annual growth)',
'latest_gdp_per_capita', 'latest_co2'],
dtype='object', length=307)
print("\nIndex of Correlation Matrix:")
print(corr_matrix_numeric.index)
Index of Correlation Matrix:
Index(['1990(E)', '1991(E)', '1992(E)', '1993(E)', '1994(E)', '1995(E)',
'1996(E)', '1997(E)', '1998(E)', '1999(E)',
...
'2015(Population annual growth)', '2016(Population annual growth)',
'2017(Population annual growth)', '2018(Population annual growth)',
'2019(Population annual growth)', '2020(Population annual growth)',
'2021(Population annual growth)', '2022(Population annual growth)',
'latest_gdp_per_capita', 'latest_co2'],
dtype='object', length=307)
# Create a boxplot using Plotly Graph Objects
fig = go.Figure()
# Add box trace for the '2015(E)' feature
feature = '2015(E)'
fig.add_trace(go.Box(
y=df[feature],
name=feature,
boxmean=True, # Show mean in the box
boxpoints='outliers', # Display outliers
jitter=0.3, # Spread out points for better visibility
marker=dict(
color='rgb(107,174,214)', # Custom marker color
outliercolor='rgba(219, 64, 82, 0.6)', # Color for outliers
line=dict(
outliercolor='rgba(219, 64, 82, 0.6)', # Outlier line color
outlierwidth=2 # Outlier line width
)
),
hoverinfo='y' # Show y-value on hover
))
# Customize layout
fig.update_layout(
title='Boxplot of Selected Features',
xaxis_title='Feature',
yaxis_title='Value',
showlegend=False, # Hide legend initially
height=1000, # Set height of the plot (increased from 600)
width=1000, # Set width of the plot
)
# Add interactive features
fig.update_traces(
hovertemplate='%{y:.2f}', # Format hover text to two decimal places
hoverlabel=dict(bgcolor='white', font_size=12), # Style hover label
boxmean=True # Show mean in the box
)
# Show the plot
fig.show()
feature = '2015(E)'
# Define the outlier detection method
q1 = df[feature].quantile(0.25)
q3 = df[feature].quantile(0.75)
iqr = q3 - q1
lower_bound = (q1 - (1.5 * iqr))
upper_bound = (q3 + (1.5 * iqr))
outliers = df[(df[feature] < lower_bound) | (df[feature] > upper_bound)][feature]
# Create a box plot with outliers highlighted
fig = px.box(df, y=feature, title=f'Boxplot of {feature} with Outliers Highlighted', labels={'value': 'Value', 'variable': 'Feature'})
fig.update_traces(marker=dict(color='red'), boxmean=True) # Customize marker color and display mean in the box
fig.add_scatter(x=outliers.index, y=outliers, mode='markers', marker=dict(color='red'), name='Outliers') # Add outliers
fig.show()
# Print number of outliers
print(f"Number of outliers for {feature}: {outliers.shape[0]}")
Number of outliers for 2015(E): 23
# Create a boxplot using Plotly Graph Objects
fig = go.Figure()
# Add box trace for the '2015(EG)' feature
feature = '2015(EG)'
fig.add_trace(go.Box(
y=df[feature],
name=feature,
boxmean=True, # Show mean in the box
boxpoints='outliers', # Display outliers
jitter=0.3, # Spread out points for better visibility
marker=dict(
color='rgb(107,174,214)', # Custom marker color
outliercolor='rgba(219, 64, 82, 0.6)', # Color for outliers
line=dict(
outliercolor='rgba(219, 64, 82, 0.6)', # Outlier line color
outlierwidth=2 # Outlier line width
)
),
hoverinfo='y' # Show y-value on hover
))
# Customize layout
fig.update_layout(
title='Boxplot of Selected Features',
xaxis_title='Feature',
yaxis_title='Value',
showlegend=False, # Hide legend initially
height=1500, # Set height of the plot (increased from 600)
width=1000, # Set width of the plot
)
# Add interactive features
fig.update_traces(
hovertemplate='%{y:.2f}', # Format hover text to two decimal places
hoverlabel=dict(bgcolor='white', font_size=12), # Style hover label
boxmean=True # Show mean in the box
)
# Show the plot
fig.show()
# Select the feature of interest
feature = '2015(EG)'
# Define the outlier detection method
q1 = df[feature].quantile(0.25)
q3 = df[feature].quantile(0.75)
iqr = q3 - q1
lower_bound = (q1 - (1.5 * iqr))
upper_bound = (q3 + (1.5 * iqr))
outliers = df[(df[feature] < lower_bound) | (df[feature] > upper_bound)][feature]
# Create a box plot with outliers highlighted
fig = px.box(df, y=feature, title=f'Boxplot of {feature} with Outliers Highlighted', labels={'value': 'Value', 'variable': 'Feature'})
fig.update_traces(marker=dict(color='red'), boxmean=True) # Customize marker color and display mean in the box
fig.add_scatter(x=outliers.index, y=outliers, mode='markers', marker=dict(color='red'), name='Outliers') # Add outliers
fig.show()
# Print number of outliers
print(f"Number of outliers for {feature}: {outliers.shape[0]}")
Number of outliers for 2015(EG): 34
# Create a boxplot using Plotly Graph Objects
fig = go.Figure()
# Add box trace for the '2015(CO2)' feature
feature = '2015(CO2)'
fig.add_trace(go.Box(
y=df[feature],
name=feature,
boxmean=True, # Show mean in the box
boxpoints='outliers', # Display outliers
jitter=0.3, # Spread out points for better visibility
marker=dict(
color='rgb(107,174,214)', # Custom marker color
outliercolor='rgba(219, 64, 82, 0.6)', # Color for outliers
line=dict(
outliercolor='rgba(219, 64, 82, 0.6)', # Outlier line color
outlierwidth=2 # Outlier line width
)
),
hoverinfo='y' # Show y-value on hover
))
# Customize layout
fig.update_layout(
title='Boxplot of Selected Features',
xaxis_title='Feature',
yaxis_title='Value',
showlegend=False, # Hide legend initially
height=1000, # Set height of the plot (increased from 600)
width=1000, # Set width of the plot
)
# Add interactive features
fig.update_traces(
hovertemplate='%{y:.2f}', # Format hover text to two decimal places
hoverlabel=dict(bgcolor='white', font_size=12), # Style hover label
boxmean=True # Show mean in the box
)
# Show the plot
fig.show()
feature = '2015(CO2)'
# Define the outlier detection method
q1 = df[feature].quantile(0.25)
q3 = df[feature].quantile(0.75)
iqr = q3 - q1
lower_bound = (q1 - (1.5 * iqr))
upper_bound = (q3 + (1.5 * iqr))
outliers = df[(df[feature] < lower_bound) | (df[feature] > upper_bound)][feature]
# Create a box plot with outliers highlighted
fig = px.box(df, y=feature, title=f'Boxplot of {feature} with Outliers Highlighted', labels={'value': 'Value', 'variable': 'Feature'})
fig.update_traces(marker=dict(color='red'), boxmean=True) # Customize marker color and display mean in the box
fig.add_scatter(x=outliers.index, y=outliers, mode='markers', marker=dict(color='red'), name='Outliers') # Add outliers
fig.show()
# Print number of outliers
print(f"Number of outliers for {feature}: {outliers.shape[0]}")
Number of outliers for 2015(CO2): 34
# Define the list of features
best_features_for_boxplot = [
'2015(GDP annual growth)',
'2015(GDP per capita annual growth)',
'2015(Population annual growth)'
]
# Create a boxplot using Plotly Graph Objects
fig = go.Figure()
# Add box traces for each feature
for feature in best_features_for_boxplot:
fig.add_trace(go.Box(
y=df[feature],
name=feature,
boxmean=True, # Show mean in the box
boxpoints='outliers', # Display outliers
jitter=0.3, # Spread out points for better visibility
marker=dict(
color='rgb(107,174,214)', # Custom marker color
outliercolor='rgba(219, 64, 82, 0.6)', # Color for outliers
line=dict(
outliercolor='rgba(219, 64, 82, 0.6)', # Outlier line color
outlierwidth=2 # Outlier line width
)
),
hoverinfo='y+name' # Show y-value and feature name on hover
))
# Customize layout
fig.update_layout(
title='Boxplot of Selected Features',
xaxis_title='Feature',
yaxis_title='Value',
boxmode='group', # Group box traces
showlegend=False, # Hide legend initially
height=1000, # Set height of the plot (increased from 600)
width=1000 # Set width of the plot
)
# Add interactive features
fig.update_traces(
hovertemplate='%{y:.2f}', # Format hover text to two decimal places
hoverlabel=dict(bgcolor='white', font_size=12), # Style hover label
boxmean=True # Show mean in the box
)
# Show the plot
fig.show()
feature = '2015(GDP annual growth)'
# Define the outlier detection method
q1 = df[feature].quantile(0.25)
q3 = df[feature].quantile(0.75)
iqr = q3 - q1
lower_bound = (q1 - (1.5 * iqr))
upper_bound = (q3 + (1.5 * iqr))
outliers = df[(df[feature] < lower_bound) | (df[feature] > upper_bound)][feature]
# Create a box plot with outliers highlighted
fig = px.box(df, y=feature, title=f'Boxplot of {feature} with Outliers Highlighted', labels={'value': 'Value', 'variable': 'Feature'})
fig.update_traces(marker=dict(color='red'), boxmean=True) # Customize marker color and display mean in the box
fig.add_scatter(x=outliers.index, y=outliers, mode='markers', marker=dict(color='red'), name='Outliers') # Add outliers
fig.show()
# Print number of outliers
print(f"Number of outliers for {feature}: {outliers.shape[0]}")
Number of outliers for 2015(GDP annual growth): 14
feature = '2015(GDP per capita annual growth)'
# Define the outlier detection method
q1 = df[feature].quantile(0.25)
q3 = df[feature].quantile(0.75)
iqr = q3 - q1
lower_bound = (q1 - (1.5 * iqr))
upper_bound = (q3 + (1.5 * iqr))
outliers = df[(df[feature] < lower_bound) | (df[feature] > upper_bound)][feature]
# Create a box plot with outliers highlighted
fig = px.box(df, y=feature, title=f'Boxplot of {feature} with Outliers Highlighted', labels={'value': 'Value', 'variable': 'Feature'})
fig.update_traces(marker=dict(color='red'), boxmean=True) # Customize marker color and display mean in the box
fig.add_scatter(x=outliers.index, y=outliers, mode='markers', marker=dict(color='red'), name='Outliers') # Add outliers
fig.show()
# Print number of outliers
print(f"Number of outliers for {feature}: {outliers.shape[0]}")
Number of outliers for 2015(GDP per capita annual growth): 12
feature = '2015(Population annual growth)'
# Define the outlier detection method
q1 = df[feature].quantile(0.25)
q3 = df[feature].quantile(0.75)
iqr = q3 - q1
lower_bound = (q1 - (1.5 * iqr))
upper_bound = (q3 + (1.5 * iqr))
outliers = df[(df[feature] < lower_bound) | (df[feature] > upper_bound)][feature]
# Create a box plot with outliers highlighted
fig = px.box(df, y=feature, title=f'Boxplot of {feature} with Outliers Highlighted', labels={'value': 'Value', 'variable': 'Feature'})
fig.update_traces(marker=dict(color='red'), boxmean=True) # Customize marker color and display mean in the box
fig.add_scatter(x=outliers.index, y=outliers, mode='markers', marker=dict(color='red'), name='Outliers') # Add outliers
fig.show()
# Print number of outliers
print(f"Number of outliers for {feature}: {outliers.shape[0]}")
Number of outliers for 2015(Population annual growth): 3
# Define features for outlier removal by year
features_by_year = {
'1990': ['1990(E)', '1990(EG)', '1990(CO2)', '1990(GDP annual growth)', '1990(GDP per capita annual growth)', '1990(Population annual growth)'],
'1991': ['1991(E)', '1991(EG)', '1991(CO2)', '1991(GDP annual growth)', '1991(GDP per capita annual growth)', '1991(Population annual growth)'],
'1992': ['1992(E)', '1992(EG)', '1992(CO2)', '1992(GDP annual growth)', '1992(GDP per capita annual growth)', '1992(Population annual growth)'],
'1993': ['1993(E)', '1993(EG)', '1993(CO2)', '1993(GDP annual growth)', '1993(GDP per capita annual growth)', '1993(Population annual growth)'],
'1994': ['1994(E)', '1994(EG)', '1994(CO2)', '1994(GDP annual growth)', '1994(GDP per capita annual growth)', '1994(Population annual growth)'],
'1995': ['1995(E)', '1995(EG)', '1995(CO2)', '1995(GDP annual growth)', '1995(GDP per capita annual growth)', '1995(Population annual growth)'],
'1996': ['1996(E)', '1996(EG)', '1996(CO2)', '1996(GDP annual growth)', '1996(GDP per capita annual growth)', '1996(Population annual growth)'],
'1997': ['1997(E)', '1997(EG)', '1997(CO2)', '1997(GDP annual growth)', '1997(GDP per capita annual growth)', '1997(Population annual growth)'],
'1998': ['1998(E)', '1998(EG)', '1998(CO2)', '1998(GDP annual growth)', '1998(GDP per capita annual growth)', '1998(Population annual growth)'],
'1999': ['1999(E)', '1999(EG)', '1999(CO2)', '1999(GDP annual growth)', '1999(GDP per capita annual growth)', '1999(Population annual growth)'],
'2000': ['2000(E)', '2000(EG)', '2000(CO2)', '2000(GDP annual growth)', '2000(GDP per capita annual growth)', '2000(Population annual growth)'],
'2001': ['2001(E)', '2001(EG)', '2001(CO2)', '2001(GDP annual growth)', '2001(GDP per capita annual growth)', '2001(Population annual growth)'],
'2002': ['2002(E)', '2002(EG)', '2002(CO2)', '2002(GDP annual growth)', '2002(GDP per capita annual growth)', '2002(Population annual growth)'],
'2003': ['2003(E)', '2003(EG)', '2003(CO2)', '2003(GDP annual growth)', '2003(GDP per capita annual growth)', '2003(Population annual growth)'],
'2004': ['2004(E)', '2004(EG)', '2004(CO2)', '2004(GDP annual growth)', '2004(GDP per capita annual growth)', '2004(Population annual growth)'],
'2005': ['2005(E)', '2005(EG)', '2005(CO2)', '2005(GDP annual growth)', '2005(GDP per capita annual growth)', '2005(Population annual growth)'],
'2006': ['2006(E)', '2006(EG)', '2006(CO2)', '2006(GDP annual growth)', '2006(GDP per capita annual growth)', '2006(Population annual growth)'],
'2007': ['2007(E)', '2007(EG)', '2007(CO2)', '2007(GDP annual growth)', '2007(GDP per capita annual growth)', '2007(Population annual growth)'],
'2008': ['2008(E)', '2008(EG)', '2008(CO2)', '2008(GDP annual growth)', '2008(GDP per capita annual growth)', '2008(Population annual growth)'],
'2009': ['2009(E)', '2009(EG)', '2009(CO2)', '2009(GDP annual growth)', '2009(GDP per capita annual growth)', '2009(Population annual growth)'],
'2010': ['2010(E)', '2010(EG)', '2010(CO2)', '2010(GDP annual growth)', '2010(GDP per capita annual growth)', '2010(Population annual growth)'],
'2011': ['2011(E)', '2011(EG)', '2011(CO2)', '2011(GDP annual growth)', '2011(GDP per capita annual growth)', '2011(Population annual growth)'],
'2012': ['2012(E)', '2012(EG)', '2012(CO2)', '2012(GDP annual growth)', '2012(GDP per capita annual growth)', '2012(Population annual growth)'],
'2013': ['2013(E)', '2013(EG)', '2013(CO2)', '2013(GDP annual growth)', '2013(GDP per capita annual growth)', '2013(Population annual growth)'],
'2014': ['2014(E)', '2014(EG)', '2014(CO2)', '2014(GDP annual growth)', '2014(GDP per capita annual growth)', '2014(Population annual growth)'],
'2015': ['2015(E)', '2015(EG)', '2015(CO2)', '2015(GDP annual growth)', '2015(GDP per capita annual growth)', '2015(Population annual growth)'],
'2016': ['2016(E)', '2016(CO2)', '2016(GDP annual growth)', '2016(GDP per capita annual growth)', '2016(Population annual growth)'],
'2017': ['2017(E)', '2017(CO2)', '2017(GDP annual growth)', '2017(GDP per capita annual growth)', '2017(Population annual growth)'],
'2018': ['2018(E)', '2018(CO2)', '2018(GDP annual growth)', '2018(GDP per capita annual growth)', '2018(Population annual growth)'],
'2019': ['2019(E)', '2019(CO2)', '2019(GDP annual growth)', '2019(GDP per capita annual growth)', '2019(Population annual growth)'],
'2020': ['2020(E)', '2020(CO2)', '2020(GDP annual growth)', '2020(GDP per capita annual growth)', '2020(Population annual growth)'],
'2021': ['2021(E)', '2021(GDP annual growth)', '2021(GDP per capita annual growth)', '2021(Population annual growth)'],
'2022': ['2022(GDP annual growth)', '2022(GDP per capita annual growth)', '2022(Population annual growth)'],
'1960': ['1960(EG)'],
'1961': ['1961(EG)', '1961(GDP annual growth)', '1961(GDP per capita annual growth)', '1961(Population annual growth)'],
'1962': ['1962(EG)', '1962(GDP annual growth)', '1962(GDP per capita annual growth)', '1962(Population annual growth)'],
'1963': ['1963(EG)', '1963(GDP annual growth)', '1963(GDP per capita annual growth)', '1963(Population annual growth)'],
'1964': ['1964(EG)', '1964(GDP annual growth)', '1964(GDP per capita annual growth)', '1964(Population annual growth)'],
'1965': ['1965(EG)', '1965(GDP annual growth)', '1965(GDP per capita annual growth)', '1965(Population annual growth)'],
'1966': ['1966(EG)', '1966(GDP annual growth)', '1966(GDP per capita annual growth)', '1966(Population annual growth)'],
'1967': ['1967(EG)', '1967(GDP annual growth)', '1967(GDP per capita annual growth)', '1967(Population annual growth)'],
'1968': ['1968(EG)', '1968(GDP annual growth)', '1968(GDP per capita annual growth)', '1968(Population annual growth)'],
'1969': ['1969(EG)', '1969(GDP annual growth)', '1969(GDP per capita annual growth)', '1969(Population annual growth)'],
'1970': ['1970(EG)', '1970(GDP annual growth)', '1970(GDP per capita annual growth)', '1970(Population annual growth)'],
'1971': ['1971(EG)', '1971(GDP annual growth)', '1971(GDP per capita annual growth)', '1971(Population annual growth)'],
'1972': ['1972(EG)', '1972(GDP annual growth)', '1972(GDP per capita annual growth)', '1972(Population annual growth)'],
'1973': ['1973(EG)', '1973(GDP annual growth)', '1973(GDP per capita annual growth)', '1973(Population annual growth)'],
'1974': ['1974(EG)', '1974(GDP annual growth)', '1974(GDP per capita annual growth)', '1974(Population annual growth)'],
'1975': ['1975(EG)', '1975(GDP annual growth)', '1975(GDP per capita annual growth)', '1975(Population annual growth)'],
'1976': ['1976(EG)', '1976(GDP annual growth)', '1976(GDP per capita annual growth)', '1976(Population annual growth)'],
'1977': ['1977(EG)', '1977(GDP annual growth)', '1977(GDP per capita annual growth)', '1977(Population annual growth)'],
'1978': ['1978(EG)', '1978(GDP annual growth)', '1978(GDP per capita annual growth)', '1978(Population annual growth)'],
'1979': ['1979(EG)', '1979(GDP annual growth)', '1979(GDP per capita annual growth)', '1979(Population annual growth)'],
'1980': ['1980(EG)', '1980(GDP annual growth)', '1980(GDP per capita annual growth)', '1980(Population annual growth)'],
'1981': ['1981(EG)', '1981(GDP annual growth)', '1981(GDP per capita annual growth)', '1981(Population annual growth)'],
'1982': ['1982(EG)', '1982(GDP annual growth)', '1982(GDP per capita annual growth)', '1982(Population annual growth)'],
'1983': ['1983(EG)', '1983(GDP annual growth)', '1983(GDP per capita annual growth)', '1983(Population annual growth)'],
'1984': ['1984(EG)', '1984(GDP annual growth)', '1984(GDP per capita annual growth)', '1984(Population annual growth)'],
'1985': ['1985(EG)', '1985(GDP annual growth)', '1985(GDP per capita annual growth)', '1985(Population annual growth)'],
'1986': ['1986(EG)', '1986(GDP annual growth)', '1986(GDP per capita annual growth)', '1986(Population annual growth)'],
'1987': ['1987(EG)', '1987(GDP annual growth)', '1987(GDP per capita annual growth)', '1987(Population annual growth)'],
'1988': ['1988(EG)', '1988(GDP annual growth)', '1988(GDP per capita annual growth)', '1988(Population annual growth)'],
'1989': ['1989(EG)', '1989(GDP annual growth)', '1989(GDP per capita annual growth)', '1989(Population annual growth)'],
}
# Create a copy of the original DataFrame
df_original = df.copy()
# Remove outliers using IQR method for each year
for year, features in features_by_year.items():
for col in features:
q1 = df[col].quantile(0.25)
q3 = df[col].quantile(0.75)
iqr = q3 - q1
# Filter data within 1.5 IQR of quartiles
df = df[(df[col] >= (q1 - 1.5 * iqr)) & (df[col] <= (q3 + 1.5 * iqr))]
# Reset index after filtering
df_filtered = df.reset_index(drop=True)
# Print informative message about outlier removal
print("After outlier removal, df has {} samples.".format(df_filtered.shape[0]))
# Print the new DataFrame
#print(df_filtered)
After outlier removal, df has 3 samples.
df_filtered
| Country Name | Country Code | Region | IncomeGroup | 1990(E) | 1991(E) | 1992(E) | 1993(E) | 1994(E) | 1995(E) | ... | 2015(Population annual growth) | 2016(Population annual growth) | 2017(Population annual growth) | 2018(Population annual growth) | 2019(Population annual growth) | 2020(Population annual growth) | 2021(Population annual growth) | 2022(Population annual growth) | latest_gdp_per_capita | latest_co2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | American Samoa | ASM | East Asia & Pacific | High income | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.0 | ... | -1.639270 | -1.807231 | -1.971819 | -2.122936 | -2.304139 | -2.421250 | -2.530171 | -1.706495 | 3.486 | 0.0 |
| 1 | Guam | GUM | East Asia & Pacific | High income | 99.844307 | 99.875816 | 99.906868 | 99.937256 | 99.96508 | 100.0 | ... | 0.259298 | 0.218837 | 0.154325 | 0.042694 | -0.032019 | 0.359326 | 0.767004 | 0.724497 | 0.000 | 0.0 |
| 2 | Timor-Leste | TLS | East Asia & Pacific | Lower middle income | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.0 | ... | 1.755472 | 1.542920 | 1.513362 | 1.485808 | 1.462727 | 1.515821 | 1.598470 | 1.529119 | -21.747 | 446.1 |
3 rows × 311 columns
df_original
| Country Name | Country Code | Region | IncomeGroup | 1990(E) | 1991(E) | 1992(E) | 1993(E) | 1994(E) | 1995(E) | ... | 2015(Population annual growth) | 2016(Population annual growth) | 2017(Population annual growth) | 2018(Population annual growth) | 2019(Population annual growth) | 2020(Population annual growth) | 2021(Population annual growth) | 2022(Population annual growth) | latest_gdp_per_capita | latest_co2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Aruba | ABW | Latin America & Caribbean | High income | 100.0 | 99.153656 | 99.197128 | 99.239914 | 100.000000 | 100.000000 | ... | 0.637959 | 0.590062 | 0.537296 | 0.494795 | 0.451970 | 0.134255 | -0.045045 | -0.086392 | 10.554 | 0.00 |
| 1 | Afghanistan | AFG | South Asia | Low income | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.121341 | 2.581549 | 2.866492 | 2.885208 | 2.908529 | 3.134747 | 2.851358 | 2.534498 | 0.000 | 8709.47 |
| 2 | Angola | AGO | Sub-Saharan Africa | Lower middle income | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 3.617678 | 3.586211 | 3.550987 | 3.464457 | 3.395278 | 3.268348 | 3.166030 | 3.096753 | -0.097 | 19814.50 |
| 3 | Albania | ALB | Europe & Central Asia | Upper middle income | 100.0 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | -0.291206 | -0.159880 | -0.091972 | -0.246732 | -0.426007 | -0.574207 | -0.926918 | -1.215790 | 6.139 | 4383.20 |
| 4 | Andorra | AND | Europe & Central Asia | High income | 100.0 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | 0.174378 | 1.100603 | 1.772183 | 1.580147 | 1.757491 | 1.761891 | 1.702288 | 0.994607 | 7.733 | 448.88 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 212 | Kosovo | XKX | Europe & Central Asia | Upper middle income | 100.0 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | -1.364932 | -0.596734 | 0.753585 | 0.339011 | -0.457730 | 0.070131 | -0.229016 | -1.355874 | 6.655 | 0.00 |
| 213 | Yemen, Rep. | YEM | Middle East & North Africa | Low income | 0.0 | 0.000000 | 44.100000 | 40.774715 | 41.985134 | 43.193512 | ... | 2.712955 | 2.621537 | 2.564321 | 2.486360 | 2.426208 | 2.310447 | 2.137790 | 2.144628 | 0.000 | 9960.10 |
| 214 | South Africa | ZAF | Sub-Saharan Africa | Upper middle income | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 2.074017 | 0.972004 | 0.387278 | 1.225530 | 1.295074 | 1.223179 | 0.998920 | 0.841058 | 1.057 | 393241.60 |
| 215 | Zambia | ZMB | Sub-Saharan Africa | Lower middle income | 13.9 | 12.752703 | 19.200000 | 14.213207 | 14.941759 | 15.668272 | ... | 3.191896 | 3.147407 | 3.113595 | 3.061888 | 3.007618 | 2.933818 | 2.840806 | 2.758032 | 2.386 | 7607.10 |
| 216 | Zimbabwe | ZWE | Sub-Saharan Africa | Lower middle income | 0.0 | 0.000000 | 28.200000 | 31.163668 | 28.100000 | 31.948580 | ... | 2.136294 | 2.081806 | 2.043620 | 2.020537 | 1.989253 | 2.031112 | 2.045715 | 2.024036 | 4.388 | 8312.50 |
217 rows × 311 columns
features = ['Country Name', 'Country Code', 'Region', 'IncomeGroup', '1990(E)', '1991(E)', '1992(E)', '1993(E)', '1994(E)', '1995(E)', '1996(E)', '1997(E)', '1998(E)', '1999(E)', '2000(E)', '2001(E)', '2002(E)', '2003(E)', '2004(E)', '2005(E)', '2006(E)', '2007(E)', '2008(E)', '2009(E)', '2010(E)', '2011(E)', '2012(E)', '2013(E)', '2014(E)', '2015(E)', '2016(E)', '2017(E)', '2018(E)', '2019(E)', '2020(E)', '2021(E)', '1960(EG)', '1961(EG)', '1962(EG)', '1963(EG)', '1964(EG)', '1965(EG)', '1966(EG)', '1967(EG)', '1968(EG)', '1969(EG)', '1970(EG)', '1971(EG)', '1972(EG)', '1973(EG)', '1974(EG)', '1975(EG)', '1976(EG)', '1977(EG)', '1978(EG)', '1979(EG)', '1980(EG)', '1981(EG)', '1982(EG)', '1983(EG)', '1984(EG)', '1985(EG)', '1986(EG)', '1987(EG)', '1988(EG)', '1989(EG)', '1990(EG)', '1991(EG)', '1992(EG)', '1993(EG)', '1994(EG)', '1995(EG)', '1996(EG)', '1997(EG)', '1998(EG)', '1999(EG)', '2000(EG)', '2001(EG)', '2002(EG)', '2003(EG)', '2004(EG)', '2005(EG)', '2006(EG)', '2007(EG)', '2008(EG)', '2009(EG)', '2010(EG)', '2011(EG)', '2012(EG)', '2013(EG)', '2014(EG)', '2015(EG)', '1990(CO2)', '1991(CO2)', '1992(CO2)', '1993(CO2)', '1994(CO2)', '1995(CO2)', '1996(CO2)', '1997(CO2)', '1998(CO2)', '1999(CO2)', '2000(CO2)', '2001(CO2)', '2002(CO2)', '2003(CO2)', '2004(CO2)', '2005(CO2)', '2006(CO2)', '2007(CO2)', '2008(CO2)', '2009(CO2)', '2010(CO2)', '2011(CO2)', '2012(CO2)', '2013(CO2)', '2014(CO2)', '2015(CO2)', '2016(CO2)', '2017(CO2)', '2018(CO2)', '2019(CO2)', '2020(CO2)', '1961(GDP annual growth)', '1962(GDP annual growth)', '1963(GDP annual growth)', '1964(GDP annual growth)', '1965(GDP annual growth)', '1966(GDP annual growth)', '1967(GDP annual growth)', '1968(GDP annual growth)', '1969(GDP annual growth)', '1970(GDP annual growth)', '1971(GDP annual growth)', '1972(GDP annual growth)', '1973(GDP annual growth)', '1974(GDP annual growth)', '1975(GDP annual growth)', '1976(GDP annual growth)', '1977(GDP annual growth)', '1978(GDP annual growth)', '1979(GDP annual growth)', '1980(GDP annual growth)', '1981(GDP annual growth)', '1982(GDP annual growth)', '1983(GDP annual growth)', '1984(GDP annual growth)', '1985(GDP annual growth)', '1986(GDP annual growth)', '1987(GDP annual growth)', '1988(GDP annual growth)', '1989(GDP annual growth)', '1990(GDP annual growth)', '1991(GDP annual growth)', '1992(GDP annual growth)', '1993(GDP annual growth)', '1994(GDP annual growth)', '1995(GDP annual growth)', '1996(GDP annual growth)', '1997(GDP annual growth)', '1998(GDP annual growth)', '1999(GDP annual growth)', '2000(GDP annual growth)', '2001(GDP annual growth)', '2002(GDP annual growth)', '2003(GDP annual growth)', '2004(GDP annual growth)', '2005(GDP annual growth)', '2006(GDP annual growth)', '2007(GDP annual growth)', '2008(GDP annual growth)', '2009(GDP annual growth)', '2010(GDP annual growth)', '2011(GDP annual growth)', '2012(GDP annual growth)', '2013(GDP annual growth)', '2014(GDP annual growth)', '2015(GDP annual growth)', '2016(GDP annual growth)', '2017(GDP annual growth)', '2018(GDP annual growth)', '2019(GDP annual growth)', '2020(GDP annual growth)', '2021(GDP annual growth)', '2022(GDP annual growth)', '1961(GDP per capita annual growth)', '1962(GDP per capita annual growth)', '1963(GDP per capita annual growth)', '1964(GDP per capita annual growth)', '1965(GDP per capita annual growth)', '1966(GDP per capita annual growth)', '1967(GDP per capita annual growth)', '1968(GDP per capita annual growth)', '1969(GDP per capita annual growth)', '1970(GDP per capita annual growth)', '1971(GDP per capita annual growth)', '1972(GDP per capita annual growth)', '1973(GDP per capita annual growth)', '1974(GDP per capita annual growth)', '1975(GDP per capita annual growth)', '1976(GDP per capita annual growth)', '1977(GDP per capita annual growth)', '1978(GDP per capita annual growth)', '1979(GDP per capita annual growth)', '1980(GDP per capita annual growth)', '1981(GDP per capita annual growth)', '1982(GDP per capita annual growth)', '1983(GDP per capita annual growth)', '1984(GDP per capita annual growth)', '1985(GDP per capita annual growth)', '1986(GDP per capita annual growth)', '1987(GDP per capita annual growth)', '1988(GDP per capita annual growth)', '1989(GDP per capita annual growth)', '1990(GDP per capita annual growth)', '1991(GDP per capita annual growth)', '1992(GDP per capita annual growth)', '1993(GDP per capita annual growth)', '1994(GDP per capita annual growth)', '1995(GDP per capita annual growth)', '1996(GDP per capita annual growth)', '1997(GDP per capita annual growth)', '1998(GDP per capita annual growth)', '1999(GDP per capita annual growth)', '2000(GDP per capita annual growth)', '2001(GDP per capita annual growth)', '2002(GDP per capita annual growth)', '2003(GDP per capita annual growth)', '2004(GDP per capita annual growth)', '2005(GDP per capita annual growth)', '2006(GDP per capita annual growth)', '2007(GDP per capita annual growth)', '2008(GDP per capita annual growth)', '2009(GDP per capita annual growth)', '2010(GDP per capita annual growth)', '2011(GDP per capita annual growth)', '2012(GDP per capita annual growth)', '2013(GDP per capita annual growth)', '2014(GDP per capita annual growth)', '2015(GDP per capita annual growth)', '2016(GDP per capita annual growth)', '2017(GDP per capita annual growth)', '2018(GDP per capita annual growth)', '2019(GDP per capita annual growth)', '2020(GDP per capita annual growth)', '2021(GDP per capita annual growth)', '2022(GDP per capita annual growth)', '1961(Population annual growth)', '1962(Population annual growth)', '1963(Population annual growth)', '1964(Population annual growth)', '1965(Population annual growth)', '1966(Population annual growth)', '1967(Population annual growth)', '1968(Population annual growth)', '1969(Population annual growth)', '1970(Population annual growth)', '1971(Population annual growth)', '1972(Population annual growth)', '1973(Population annual growth)', '1974(Population annual growth)', '1975(Population annual growth)', '1976(Population annual growth)', '1977(Population annual growth)', '1978(Population annual growth)', '1979(Population annual growth)', '1980(Population annual growth)', '1981(Population annual growth)', '1982(Population annual growth)', '1983(Population annual growth)', '1984(Population annual growth)', '1985(Population annual growth)', '1986(Population annual growth)', '1987(Population annual growth)', '1988(Population annual growth)', '1989(Population annual growth)', '1990(Population annual growth)', '1991(Population annual growth)', '1992(Population annual growth)', '1993(Population annual growth)', '1994(Population annual growth)', '1995(Population annual growth)', '1996(Population annual growth)', '1997(Population annual growth)', '1998(Population annual growth)', '1999(Population annual growth)', '2000(Population annual growth)', '2001(Population annual growth)', '2002(Population annual growth)', '2003(Population annual growth)', '2004(Population annual growth)', '2005(Population annual growth)', '2006(Population annual growth)', '2007(Population annual growth)', '2008(Population annual growth)', '2009(Population annual growth)', '2010(Population annual growth)', '2011(Population annual growth)', '2012(Population annual growth)', '2013(Population annual growth)', '2014(Population annual growth)', '2015(Population annual growth)', '2016(Population annual growth)', '2017(Population annual growth)', '2018(Population annual growth)', '2019(Population annual growth)', '2020(Population annual growth)', '2021(Population annual growth)', '2022(Population annual growth)', 'latest_gdp_per_capita']
target = 'latest_co2'
X = df_original[features]
y = df_original[target]
X = pd.get_dummies(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Define the hyperparameter grid
parameters = {'alpha': [0.1, 1, 10]}
# Instantiate the GridSearchCV object
ridge_grid = GridSearchCV(Ridge(), parameters)
# Fit the GridSearchCV object on the training data
ridge_grid.fit(X_train_scaled, y_train)
# Get the best alpha value from the grid search
best_alpha = ridge_grid.best_params_['alpha']
print(f"Best alpha for Ridge: {best_alpha}")
# Instantiate the Ridge Regression model with the best alpha
ridge = Ridge(alpha=best_alpha)
# Fit the Ridge model on the training data
ridge.fit(X_train_scaled, y_train)
# (Optional) Predict on the testing data using the Ridge model
y_pred_ridge = ridge.predict(X_test_scaled)
# Calculate evaluation metrics
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
rmse_ridge = np.sqrt(mse_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)
# Print the evaluation metrics
print("Ridge Regression:")
print(f"- Mean Squared Error (Ridge): {mse_ridge}")
print(f"- Mean Absolute Error (Ridge): {mae_ridge}")
print(f"- Root Mean Squared Error (Ridge): {rmse_ridge}")
print(f"- R-squared (Ridge): {r2_ridge}")
Best alpha for Ridge: 0.1 Ridge Regression: - Mean Squared Error (Ridge): 15576340795.818892 - Mean Absolute Error (Ridge): 81629.23186175199 - Root Mean Squared Error (Ridge): 124805.21141290091 - R-squared (Ridge): 0.43448503949473927
# Create a DataFrame for visualization
viz_data = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred_ridge})
# Scatter plot of actual vs. predicted values
fig = px.scatter(viz_data, x='Actual', y='Predicted', title='Actual vs. Predicted Values (Ridge Regression)')
fig.update_layout(xaxis_title='Actual', yaxis_title='Predicted')
fig.show()
# Calculate residuals
residuals = y_test - y_pred_ridge
# Scatter plot of residuals
fig = px.scatter(x=y_test, y=residuals, title='Residual Plot (Ridge Regression)')
fig.update_layout(xaxis_title='Actual', yaxis_title='Residuals', yaxis=dict(title='Residuals', zeroline=False))
fig.show()
# Histogram of residuals
fig = px.histogram(x=residuals, nbins=50, title='Distribution of Residuals (Ridge Regression)')
fig.update_layout(
xaxis_title='Residuals',
yaxis_title='Frequency',
margin=dict(t=50, b=50, l=50, r=50), # Add a slight gap between the plot and the edges
paper_bgcolor='rgba(255,255,255,0.9)', # Set white background color
plot_bgcolor='rgba(240,240,240,0.9)', # Set white background color
bargap=0.1 # Add a slight gap between the histograms
)
fig.show()
# Initialize Linear Regression
linear_regression = LinearRegression()
# Train the model
linear_regression.fit(X_train_scaled, y_train)
# Evaluate the model
y_pred = linear_regression.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print("Linear Regression:")
print(f"- MSE: {mse}")
print(f"- MAE: {mae}")
print(f"- RMSE: {rmse}")
print(f"- R2: {r2}")
Linear Regression: - MSE: 15319379246.472343 - MAE: 80348.82994515097 - RMSE: 123771.47993973548 - R2: 0.44381429097523406
# Create the scatter plot
fig = go.Figure()
# Add scatter plot for predicted vs actual values
fig.add_trace(go.Scatter(x=y_test, y=y_pred, mode='markers', marker=dict(color='blue'), name='Predicted vs Actual'))
# Add diagonal line (y=x) for reference
fig.add_trace(go.Scatter(x=y_test, y=y_test, mode='lines', line=dict(color='black', width=2, dash='dash'), name='y=x'))
# Update layout
fig.update_layout(
title='Predicted vs Actual Plot',
xaxis_title='Actual Values',
yaxis_title='Predicted Values',
showlegend=True,
height=500,
width=800
)
# Show plot
fig.show()
# Create the scatter plot
fig = go.Figure()
# Calculate residuals
residuals = y_test - y_pred
# Add scatter plot for residuals
fig.add_trace(go.Scatter(x=y_pred, y=residuals, mode='markers', marker=dict(color='blue'), name='Residuals'))
# Add horizontal line at y=0
fig.add_shape(type="line", x0=min(y_pred), y0=0, x1=max(y_pred), y1=0, line=dict(color="red", width=2, dash="dash"), name='Zero Residuals')
# Update layout
fig.update_layout(
title='Residual Plot',
xaxis_title='Predicted Values',
yaxis_title='Residuals',
showlegend=True,
height=500,
width=800
)
# Show plot
fig.show()
# Create the bar plot
fig = go.Figure()
# Extract feature importance from the model (replace 'model' with your trained linear regression model)
feature_importance = linear_regression.coef_
# Add bar plot for feature importances
fig.add_trace(go.Bar(x=feature_importance, y=X_train.columns, orientation='h', marker=dict(color='blue'), name='Feature Importance'))
# Update layout
fig.update_layout(
title='Feature Importance Plot',
xaxis_title='Feature Importance',
yaxis_title='Feature',
showlegend=False,
height=2000,
width=1000
)
# Show plot
fig.show()
# Initialize Decision Tree Regressor
decision_tree = DecisionTreeRegressor(random_state=42)
# Define hyperparameters for experimentation
param_grid = {
'max_depth': [None, 5, 10, 15], # Maximum depth of the tree
'min_samples_split': [2, 5, 10], # Minimum number of samples required to split an internal node
'min_samples_leaf': [1, 2, 4], # Minimum number of samples required to be at a leaf node
}
# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=decision_tree, param_grid=param_grid,
cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)
# Get the best parameters
best_params = grid_search.best_params_
# Initialize Decision Tree Regressor with best parameters
best_decision_tree = DecisionTreeRegressor(random_state=42, **best_params)
# Train the model with best parameters
best_decision_tree.fit(X_train_scaled, y_train)
# Evaluate the model
y_pred = best_decision_tree.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print("Decision Tree with Tuned Hyperparameters:")
print(f"- Best Parameters: {best_params}")
print(f"- MSE: {mse}")
print(f"- MAE: {mae}")
print(f"- RMSE: {rmse}")
print(f"- R2: {r2}")
Decision Tree with Tuned Hyperparameters:
- Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5}
- MSE: 18647424274.05721
- MAE: 27348.99365530303
- RMSE: 136555.5721091498
- R2: 0.3229862173599225
def plot_prediction_vs_actual_decision_tree_interactive(model, X, y):
y_pred = model.predict(X)
fig = go.Figure()
fig.add_trace(go.Scatter(x=y, y=y_pred, mode='markers',
marker=dict(color='blue', size=8, opacity=0.5),
name='Predictions'))
fig.add_trace(go.Scatter(x=[y.min(), y.max()], y=[y.min(), y.max()],
mode='lines', line=dict(color='black', width=2, dash='dash'),
name='Perfect Prediction'))
fig.update_layout(title='Decision Tree Prediction vs. Actual Plot',
xaxis_title='Actual',
yaxis_title='Predicted',
hovermode='closest',
showlegend=True)
fig.show()
# Assuming best_decision_tree, X_test_scaled, and y_test are defined
plot_prediction_vs_actual_decision_tree_interactive(best_decision_tree, X_test_scaled, y_test)
# Customize the plot style
plt.figure(figsize=(16, 10)) # Adjust the figure size
plot_tree(best_decision_tree, feature_names=X_train.columns.tolist(), filled=True,
fontsize=10, precision=2, rounded=True, impurity=False,
class_names=["Low", "Medium", "High"], max_depth=3) # Enhance the tree visualization
plt.title("Decision Tree Visualization", fontsize=20) # Set the title and adjust font size
plt.tight_layout() # Improve spacing
plt.savefig("decision_tree_visualization.png") # Save the plot as an image
plt.show()
# Extract feature importance from the model (replace 'model' with your trained decision tree model)
feature_importance_dt = best_decision_tree.feature_importances_
# Create a Plotly bar plot for feature importance
fig = go.Figure(data=[go.Bar(x=X_train.columns, y=feature_importance_dt, marker_color='green')])
fig.update_layout(title='Decision Tree Feature Importance Plot',
xaxis_title='Feature',
yaxis_title='Feature Importance',
width=2000, height=1200)
fig.show()
# Initialize Random Forest Regressor
random_forest = RandomForestRegressor(random_state=42)
# Define hyperparameters for experimentation
param_grid = {
'n_estimators': [100, 200, 300], # Number of trees in the forest
'max_depth': [None, 5, 10, 20], # Maximum depth of the tree
'min_samples_split': [2, 5, 10], # Minimum number of samples required to split an internal node
'min_samples_leaf': [1, 2, 4], # Minimum number of samples required to be at a leaf node
}
# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid,
cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)
# Get the best parameters
best_params = grid_search.best_params_
# Initialize Random Forest Regressor with best parameters
best_random_forest = RandomForestRegressor(random_state=42, **best_params)
# Train the model with best parameters
best_random_forest.fit(X_train_scaled, y_train)
# Evaluate the model
y_pred = best_random_forest.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print("Random Forest with Tuned Hyperparameters:")
print(f"- Best Parameters: {best_params}")
print(f"- MSE: {mse}")
print(f"- MAE: {mae}")
print(f"- RMSE: {rmse}")
print(f"- R2: {r2}")
Random Forest with Tuned Hyperparameters:
- Best Parameters: {'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
- MSE: 9746868884.942074
- MAE: 26766.682104552576
- RMSE: 98726.2320001228
- R2: 0.6461299707825127
def plot_decision_tree(model, feature_names):
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(20, 10))
plot_tree(model.estimators_[0], feature_names=feature_names.tolist(), filled=True, ax=axes, fontsize=10,
impurity=False, precision=2, proportion=True, rounded=True, node_ids=True,
max_depth=5, label='root')
plt.title("Decision Tree Visualization (Sample Tree)", fontsize=16)
plt.show()
# Assuming you have already trained your random forest model (stored in 'best_random_forest')
# and have feature names (stored in 'X_train.columns')
# You can replace 'best_random_forest' with your trained random forest model
plot_decision_tree(best_random_forest, X_train.columns)
def plot_mse_vs_trees_interactive(model):
mse_values = [np.mean(tree.tree_.impurity[tree.apply(X_train)]) for tree in model.estimators_]
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range(1, len(model.estimators_) + 1)), y=mse_values, mode='lines+markers', name='MSE'))
fig.update_layout(title='Mean Squared Error (MSE) vs. Number of Trees',
xaxis_title='Number of Trees',
yaxis_title='Mean Squared Error (MSE)',
xaxis=dict(type='linear'),
yaxis=dict(type='linear'),
showlegend=True,
plot_bgcolor='rgba(0,0,0,0)')
fig.show()
# Assuming you have already trained your random forest model and stored it in 'best_random_forest'
plot_mse_vs_trees_interactive(best_random_forest)
/Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names /Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names
def plot_feature_importance(model, feature_names):
feature_importance = model.feature_importances_
feature_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance})
fig = px.bar(feature_df, x='Importance', y='Feature', orientation='h',
title='Feature Importance in Random Forest Model',
labels={'Importance': 'Feature Importance', 'Feature': 'Features'},
color='Importance', color_continuous_scale='inferno')
fig.update_layout(yaxis={'categoryorder':'total ascending'}, coloraxis_showscale=False)
fig.show()
# Assuming you have already trained your random forest model (stored in 'best_random_forest') and have feature names
# You can replace 'best_random_forest' with your trained random forest model
plot_feature_importance(best_random_forest, X_train.columns.tolist())
# Initialize Gradient Boosting Regressor
gb_regressor = GradientBoostingRegressor(random_state=42)
# Define hyperparameters for grid search
param_grid = {
'n_estimators': [100, 200, 300], # Number of trees
'learning_rate': [0.01, 0.1, 0.2], # Learning rate
'max_depth': [3, 5, 10], # Maximum depth of trees
}
# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=gb_regressor, param_grid=param_grid,
cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)
# Get the best parameters
best_params = grid_search.best_params_
# Initialize Gradient Boosting Regressor with best parameters
best_gb_regressor = GradientBoostingRegressor(random_state=42, **best_params)
# Train the model with best parameters
best_gb_regressor.fit(X_train_scaled, y_train)
# Evaluate the model
y_pred = best_gb_regressor.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Gradient Boosting with Tuned Hyperparameters:")
print(f"- Best Parameters: {best_params}")
print(f"- MAE: {mae}")
print(f"- RMSE: {rmse}")
print(f"- MSE: {mse}")
print(f"- R2: {r2}")
Gradient Boosting with Tuned Hyperparameters:
- Best Parameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}
- MAE: 26766.682104552576
- RMSE: 98726.2320001228
- MSE: 11475343277.93204
- R2: 0.5833759426767295
def plot_prediction_vs_actual_interactive(y_test, y_pred):
fig = go.Figure()
fig.add_trace(go.Scatter(x=y_test, y=y_pred, mode='markers',
marker=dict(color='blue', opacity=0.5),
name='Predicted vs Actual'))
fig.add_trace(go.Scatter(x=y_test, y=y_test, mode='lines',
line=dict(color='black', dash='dash'),
name='Perfect Prediction'))
fig.update_layout(title='Prediction vs. Actual Plot',
xaxis_title='Actual',
yaxis_title='Predicted',
hovermode='closest')
fig.show()
# Assuming y_test and y_pred are defined
plot_prediction_vs_actual_interactive(y_test, y_pred)
def plot_residuals_interactive(y_test, y_pred):
residuals = y_test - y_pred
fig = go.Figure()
fig.add_trace(go.Scatter(x=y_pred, y=residuals, mode='markers',
marker=dict(color='blue', opacity=0.5),
name='Residuals'))
fig.add_hline(y=0, line_dash='dash', line_color='red',
annotation_text='Zero Residual Line',
annotation_position='bottom right')
fig.update_layout(title='Residual Plot',
xaxis_title='Predicted',
yaxis_title='Residuals',
hovermode='closest')
fig.show()
# Assuming y_test and y_pred are defined
plot_residuals_interactive(y_test, y_pred)
def plot_learning_curve_interactive(model, X, y):
train_sizes, train_scores, test_scores = learning_curve(model, X, y, cv=5, scoring='neg_mean_squared_error')
train_scores_mean = np.mean(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
fig = go.Figure()
# Plot training scores
fig.add_trace(go.Scatter(x=train_sizes, y=train_scores_mean, mode='lines', name='Training Score',
line=dict(color='red')))
# Plot cross-validation scores
fig.add_trace(go.Scatter(x=train_sizes, y=test_scores_mean, mode='lines', name='Cross-validation Score',
line=dict(color='green')))
# Add shaded regions representing the standard deviation of scores
fig.add_trace(go.Scatter(x=train_sizes, y=train_scores_mean + np.std(train_scores, axis=1),
mode='lines', fill='tonexty', fillcolor='rgba(255,0,0,0.3)', showlegend=False))
fig.add_trace(go.Scatter(x=train_sizes, y=train_scores_mean - np.std(train_scores, axis=1),
mode='lines', fill='tonexty', fillcolor='rgba(255,0,0,0.3)', showlegend=False))
fig.add_trace(go.Scatter(x=train_sizes, y=test_scores_mean + np.std(test_scores, axis=1),
mode='lines', fill='tonexty', fillcolor='rgba(0,255,0,0.3)', showlegend=False))
fig.add_trace(go.Scatter(x=train_sizes, y=test_scores_mean - np.std(test_scores, axis=1),
mode='lines', fill='tonexty', fillcolor='rgba(0,255,0,0.3)', showlegend=False))
# Update layout
fig.update_layout(title='Learning Curve',
xaxis_title='Training Examples',
yaxis_title='Score',
hovermode='closest')
fig.show()
# Assuming X_train_scaled and y_train are defined
plot_learning_curve_interactive(best_gb_regressor, X_train_scaled, y_train)
def plot_learning_rate_curve_interactive(X_train, y_train, X_test, y_test):
learning_rates = np.linspace(0.01, 1.0, 10)
train_scores = []
test_scores = []
for learning_rate in learning_rates:
gb_regressor = GradientBoostingRegressor(learning_rate=learning_rate, random_state=42)
gb_regressor.fit(X_train, y_train)
train_scores.append(gb_regressor.score(X_train, y_train))
test_scores.append(gb_regressor.score(X_test, y_test))
fig = go.Figure()
# Plot training scores
fig.add_trace(go.Scatter(x=learning_rates, y=train_scores, mode='lines+markers', name='Training Score',
line=dict(color='red')))
# Plot test scores
fig.add_trace(go.Scatter(x=learning_rates, y=test_scores, mode='lines+markers', name='Test Score',
line=dict(color='green')))
# Update layout
fig.update_layout(title='Learning Rate Curve',
xaxis_title='Learning Rate',
yaxis_title='Score',
hovermode='closest')
fig.show()
# Assuming X_train_scaled, X_test_scaled, y_train, and y_test are defined
plot_learning_rate_curve_interactive(X_train_scaled, y_train, X_test_scaled, y_test)
# Initialize MLP Regressor with adjusted parameters
mlp_regressor = MLPRegressor(
hidden_layer_sizes=(200, 100), # Adjusted hidden layer sizes for better generalization
activation='relu',
alpha=0.0001, # Decreased alpha for regularization
learning_rate_init=0.01, # Decreased learning rate for smoother convergence
max_iter=5000, # Increased max_iter for better convergence
early_stopping=True,
validation_fraction=0.2
)
# Train the model
history = mlp_regressor.fit(X_train_scaled, y_train)
# Predict on the test set
y_pred = mlp_regressor.predict(X_test_scaled)
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print("Neural Network with Tuned Hyperparameters:")
print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
Neural Network with Tuned Hyperparameters: MSE: 15942561347.07892 MAE: 90547.34294127848 RMSE: 126263.85605975655 R2: 0.4211890283649865
def plot_prediction_vs_actual_interactive(model, X, y):
y_pred = model.predict(X)
pred_actual_df = pd.DataFrame({'Actual': y, 'Predicted': y_pred})
fig = go.Figure()
fig.add_trace(go.Scatter(x=pred_actual_df['Actual'], y=pred_actual_df['Predicted'],
mode='markers', marker=dict(color='blue'),
name='Predicted vs Actual'))
# Corrected line to represent y=x
fig.add_trace(go.Scatter(x=pred_actual_df['Actual'], y=pred_actual_df['Actual'],
mode='lines', line=dict(color='black', width=2, dash='dash'),
name='y=x'))
fig.update_layout(title='Prediction vs. Actual Plot',
xaxis_title='Actual Values',
yaxis_title='Predicted Values',
showlegend=True,
height=500,
width=800)
fig.show()
# Assuming you have already trained your neural network model (stored in 'mlp_regressor')
# and have test data (stored in 'X_test' and 'y_test')
plot_prediction_vs_actual_interactive(mlp_regressor, X_test, y_test)
/Users/abdullahalrakin/anaconda3/lib/python3.11/site-packages/sklearn/base.py:457: UserWarning: X has feature names, but MLPRegressor was fitted without feature names
def plot_loss_curve_interactive(history):
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range(1, len(history.loss_curve_) + 1)),
y=history.loss_curve_, mode='lines', name='Training Loss',
line=dict(color='blue')))
fig.add_trace(go.Scatter(x=list(range(1, len(history.validation_scores_) + 1)),
y=history.validation_scores_, mode='lines', name='Validation Loss',
line=dict(color='orange')))
fig.update_layout(title='Loss Curve (Training vs. Validation)',
xaxis_title='Epochs',
yaxis_title='Loss',
hovermode='closest')
fig.show()
# Plot loss curve interactively
plot_loss_curve_interactive(history)
def plot_learning_curve_interactive(estimator, X, y, ylim=None, cv=None,
n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5)):
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
fig = go.Figure()
# Plot training scores
fig.add_trace(go.Scatter(x=train_sizes, y=train_scores_mean - train_scores_std,
mode='lines', name='Training Score (Mean - Std)',
line=dict(color='rgba(255, 165, 0, 0.5)')))
fig.add_trace(go.Scatter(x=train_sizes, y=train_scores_mean + train_scores_std,
mode='lines', name='Training Score (Mean + Std)',
line=dict(color='rgba(255, 165, 0, 0.5)'),
fill='tonexty'))
# Plot validation scores
fig.add_trace(go.Scatter(x=train_sizes, y=test_scores_mean - test_scores_std,
mode='lines', name='Cross-validation Score (Mean - Std)',
line=dict(color='rgba(0, 128, 128, 0.5)')))
fig.add_trace(go.Scatter(x=train_sizes, y=test_scores_mean + test_scores_std,
mode='lines', name='Cross-validation Score (Mean + Std)',
line=dict(color='rgba(0, 128, 128, 0.5)'),
fill='tonexty'))
# Plot means
fig.add_trace(go.Scatter(x=train_sizes, y=train_scores_mean,
mode='lines+markers', name='Training Score Mean',
line=dict(color='rgb(255, 165, 0)')))
fig.add_trace(go.Scatter(x=train_sizes, y=test_scores_mean,
mode='lines+markers', name='Cross-validation Score Mean',
line=dict(color='rgb(0, 128, 128)')))
# Add layout
fig.update_layout(title='Learning Curve',
xaxis_title='Training Examples',
yaxis_title='Score',
hovermode='closest')
fig.show()
# Assuming you have already trained your MLPRegressor model and have your data stored in X_train_scaled and y_train
plot_learning_curve_interactive(mlp_regressor, X_train_scaled, y_train)
# Define models and their corresponding performance metrics
models = ['Ridge Regression', 'Linear Regression', 'Decision Tree', 'Random Forest', 'Gradient Boosting', 'Neural Network']
mse_scores = [15576340795.818892, 15319379246.472343, 18647424274.05721, 9746868884.942074, 11475343277.93204, 15942561347.07892]
mae_scores = [81629.23186175199, 80348.82994515097, 27348.99365530303, 26766.682104552576, 26766.682104552576, 90547.34294127848]
rmse_scores = [124805.21141290091, 123771.47993973548, 136555.5721091498, 98726.2320001228, 98726.2320001228, 126263.85605975655]
r2_scores = [0.43448503949473927, 0.44381429097523406, 0.3229862173599225, 0.6461299707825127, 0.5833759426767295, 0.4211890283649865]
# Create traces for each performance metric
mse_trace = go.Bar(x=models, y=mse_scores, name='MSE', marker=dict(color='blue'))
mae_trace = go.Bar(x=models, y=mae_scores, name='MAE', marker=dict(color='orange'))
rmse_trace = go.Bar(x=models, y=rmse_scores, name='RMSE', marker=dict(color='green'))
r2_trace = go.Bar(x=models, y=r2_scores, name='R2', marker=dict(color='red'))
# Create subplots
fig = go.Figure(data=[mse_trace, mae_trace, rmse_trace, r2_trace])
# Update layout for better presentation
fig.update_layout(
title='Comparison of Performance Metrics Across Models',
xaxis=dict(title='Models', tickangle=-45),
yaxis=dict(title='Scores'),
barmode='group',
legend=dict(x=1, y=1),
plot_bgcolor='rgba(0,0,0,0)'
)
# Show interactive plot
fig.show()
This notebook documents the process of building regression models for predicting continuous target variables. It outlines the steps taken to preprocess the data, explore its characteristics, engineer relevant features, build several regression models, and evaluate their performance.
Regression analysis is a statistical technique used to model the relationship between a dependent variable and one or more independent variables. In this project, we aim to develop regression models (Linear Regression, ) that can accurately predict continuous target variables (CO2 emission) based on given features.
| Model | MSE | MAE | RMSE | R2 |
|---|---|---|---|---|
| Ridge Regression | 1.558e10 | 81629.23 | 124805.21 | 0.4345 |
| Linear Regression | 1.532e10 | 80348.83 | 123771.48 | 0.4438 |
| Decision Tree (Tuned) | 1.865e10 | 27348.99 | 136555.57 | 0.3230 |
| Random Forest (Tuned) | 9.747e9 | 26766.68 | 98726.23 | 0.6461 |
| Gradient Boosting (Tuned) | 1.148e10 | 26766.68 | 98726.23 | 0.5834 |
| Neural Network (Tuned) | 1.594e10 | 90547.34 | 126263.86 | 0.4212 |
From the table, we can observe that:
Overall, based on the provided metrics, Random Forest with tuned hyperparameters seems to be the best-performing model for this dataset, followed closely by Linear Regression. However, further analysis such as cross-validation and testing on unseen data would provide more robust conclusions.
This documentation summarizes the process of building regression models for predicting continuous target variables. By following the steps outlined in this project, accurate and robust regression models can be developed for various real-world applications.